## make A2C fluid
# A2C流体版
# 学習

In [1]:
#!/usr/bin/python
import numpy as np
import pandas as pd
import time
from collections import namedtuple
import matplotlib.pyplot as plt
%matplotlib inline


from collections import deque
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import gym
from gym import spaces
from gym.spaces.box import Box


import os
import subprocess
import PyFoam
import PyFoam.FoamInformation
from PyFoam.RunDictionary.SolutionDirectory import SolutionDirectory
from PyFoam.RunDictionary.ParsedParameterFile import ParsedParameterFile
from PyFoam.Basics.DataStructures import Vector
from PyFoam.Execution.BasicRunner import BasicRunner
from PyFoam.Basics.TemplateFile import TemplateFile
import shlex,sys,json
import re
from pathlib import Path

from copy import copy
import random

In [2]:
# set directory
# 試す用
CASE = SolutionDirectory("../aircond5/Case/case0")

In [3]:
## 実行環境の設定

class Aircond:
    '''Aircondのクラス'''
    def __init__(self, CASE, stride=500,end=3000,xCells=40,
                         insert_list = [15,15,15,15,33,33,33,51,69,69,69,87,105,105,105,142,142,142,342,342,380,380]):
        self.CASE = CASE
        # メッシュを作らないとpolymeshがないので。
        os.system(CASE.name + '/Makemesh')
        # get nCells
        with open (self.CASE.name + '/constant/polyMesh/neighbour') as f:
            neighbour = f.read()
        nCells_index = neighbour.find('nCells')
        nCells_ = neighbour[nCells_index : nCells_index+15]
        nCells = int(re.sub(r'\D', '', nCells_))
        self.nCells = nCells
        
        #self.action_SPEED = np.array([0.1,0.3,0.5])
        #self.action_DIRECTION = np.array([-1*np.pi/8, -2*np.pi/8,-3*np.pi/8])
        #self.action_TEMPERTURE = np.array([18+273.15,22+273.15,26+273.15])
        self.action_SPEED = np.array([0.1,0.3,0.5,0.8,1.0])
        self.action_DIRECTION = np.array([-1*np.pi/16, -2*np.pi/16,-3*np.pi/16,-4*np.pi/16,-5*np.pi/16,-6*np.pi/16,-7*np.pi/16])
        self.action_TEMPERTURE = np.array([18+273.15,19+273.15,20+273.15,22+273.15,24+273.15,25+273.15,26+273.15])
        self.action_space = np.tile(np.array([0,0,0]),(245,1))
        self.observation_space_ = np.tile(np.array([0,0,0]),(self.nCells,1))
        #self.observation_space = np.tile(np.array([0]), (self.nCells*3,1)
        
        self.xCells = xCells
        self.insert_list = insert_list
        observation_space = np.tile(np.array([0,0,0]), (self.nCells+len(self.insert_list),1))
        U_space_x = observation_space[:,0].reshape(self.xCells,-1)
        U_space_y = observation_space[:,1].reshape(self.xCells,-1)
        T_space = observation_space[:,2].reshape(self.xCells,-1)
        self.observation_space = np.array([U_space_x, U_space_y, T_space]) 
        
        self.stride = stride  # 進めるステップの幅
        # stepが始まってからのtime。始まる前にstepを進めた場合は含まれず0
        self.present_time = 0  
        # openFoam側のcontrolDictに記載されているtime
        self.startTime = 0
        self.endTime = copy(self.stride)
        # いつ終了するか
        self.end = end
        
        # 各辞書ファイルの取得
        self.initialDir = self.CASE.initialDir()+'/'
        self.constant = self.CASE.name + "/constant/"
        self.system = self.CASE.name + "/system/"
        self.initialDir_file = []
        for x in os.listdir(self.initialDir):
            if os.path.isfile(self.initialDir + x):
                self.initialDir_file.append(x)
        self.constant_file = []
        for y in os.listdir(self.constant):
            if os.path.isfile(self.constant + y):
                self.constant_file.append(y)
        self.system_file = []
        for z in os.listdir(self.system):
            if os.path.isfile(self.system + z):
                self.system_file.append(z)
        
        # 各辞書ファイルをそれぞれのファイル名で保存
        for i in range(len(self.initialDir_file)):
            self.__dict__[self.initialDir_file[i]] = ParsedParameterFile(self.initialDir + self.initialDir_file[i])

        for i in range(len(self.system_file)):
            self.__dict__[self.system_file[i]] = ParsedParameterFile(self.system + self.system_file[i])
            
    def initial_to_float(self, numpy_Parsed_value):
        '''uniformをnp.arrayに変換'''
        numpy_Parsed_value = np.array(numpy_Parsed_value)
        if numpy_Parsed_value.ndim==0:
            Parsed_raw = str(numpy_Parsed_value.all())
            Parsed_str = Parsed_raw[8:].strip('()').split(' ')
            Parsed_int = np.array(list(map(float,Parsed_str)))
            #Parsed = np.tile(Parsed_int,(self.nCells,1))
        return Parsed_int
    
    def initial_to_array(self, numpy_Parsed_value):
        '''uniformをnCellの数だけnp.arrayに変換'''
        numpy_Parsed_value = np.array(numpy_Parsed_value)
        if numpy_Parsed_value.ndim==0:
            Parsed_raw = str(numpy_Parsed_value.all())
            Parsed_str = Parsed_raw[8:].strip('()').split(' ')
            Parsed_int = np.array(list(map(float,Parsed_str)))
            Parsed = np.tile(Parsed_int,(self.nCells,1))
        return Parsed

    def make_observation_old(self,Dir):
        '''Dirのpathのobservationを取得'''
        U_value = np.array(ParsedParameterFile(Dir + '/U').content['internalField'])
        T_value = np.array(ParsedParameterFile(Dir + '/T').content['internalField'])
        if U_value.ndim == 0:
            U_value = self.initial_to_array(U_value)
            T_value = self.initial_to_array(T_value)
        U_value_xy = np.delete(U_value, axis=1, obj=2)
        T_value_x = np.reshape(T_value, [-1,1], order='F')
        Observation = np.concatenate([U_value_xy, T_value_x],axis=1)
        return Observation    
    
    def make_observation_onerow(self,Dir):
        '''Dirのpathのobservationを取得
        各U1, U2, Tがすべて一列で並んだ状態を返す'''
        U_value = np.array(ParsedParameterFile(Dir + '/U').content['internalField'])
        T_value = np.array(ParsedParameterFile(Dir + '/T').content['internalField'])
        if U_value.ndim == 0:
            U_value = self.initial_to_array(U_value)
            T_value = self.initial_to_array(T_value)
        #U_value_xy = np.delete(U_value, axis=1, obj=2)
        T_value_x = T_value.reshape(-1, 1)
        U_value_x = U_value[:,0].reshape(-1, 1)
        U_value_y = U_value[:,1].reshape(-1, 1)
        observation = np.concatenate([U_value_x, U_value_y, T_value_x], axis=0)
        return observation
    
    def make_observation(self,Dir,celsius=True):
        '''observationを２次元で取得。
        障害物があるところは全て値を0で埋める。
        self.xCells : x方向のセル数
        self.insert_list : 障害物があり、値を0で埋めるべき場所
        '''
        U_value = np.array(ParsedParameterFile(Dir + '/U').content['internalField'])
        T_value = np.array(ParsedParameterFile(Dir + '/T').content['internalField'])
        if U_value.ndim == 0:
            U_value = self.initial_to_array(U_value)
            T_value = self.initial_to_array(T_value)
        # セルシウス℃に直す
        if celsius:
            T_value = self.Celsius_(T_value)
            T_value = T_value.astype(np.float64)
        U_value_xy = np.delete(U_value, axis=1, obj=2)
        T_value_x = np.reshape(T_value, [-1,1], order='F')
        observation_ = np.concatenate([U_value_xy, T_value_x],axis=1)  # 3 axis observation
        observation_ = np.insert(observation_, self.insert_list, [0,0,0], axis=0)
        U_value_x = observation_[:,0].reshape(self.xCells,-1)
        U_value_y = observation_[:,1].reshape(self.xCells,-1)
        T_value = observation_[:,2].reshape(self.xCells,-1)
        observation = np.array([U_value_x, U_value_y, T_value])
        return observation
    
    def make_action(self):
        '''actionの設定'''
        Action = np.empty((0,3),float)
        for i in range(len(self.action_SPEED)):
            for j in range(len(self.action_DIRECTION)):
                for k in range(len(self.action_TEMPERTURE)):
                    Ux = self.action_SPEED[i]*np.cos(self.action_DIRECTION[j])
                    Uy = self.action_SPEED[i]*np.sin(self.action_DIRECTION[j])
                    Act = np.array([[Ux,Uy,self.action_TEMPERTURE[k]]])
                    Action = np.append(Action,Act,axis=0)
                    
        return Action
    
    def getParsed(self,time_step):
        '''各time_stepのParsedParameterFileを取得'''
        T = ParsedParameterFile(self.CASE.name + '/' + str(time_step) + '/T')
        U = ParsedParameterFile(self.CASE.name + '/' + str(time_step) + '/U')
        TU_list = [T,U]
        return TU_list
    
    
    def getParsedList(self,first_step, last_step, write_step,):
        '''各time_stepのParsedParameterFileを取得'''
        TU_list = []
        for stp in range(first_step, last_step, write_step):
            T = ParsedParameterFile(self.CASE.name + '/' + str(stp) + '/T')
            U = ParsedParameterFile(self.CASE.name + '/' + str(stp) + '/U')
            TU_list.append([T,U])
        return TU_list
    
    # 後にcythonで書き直す予定
    def calc_PMV(self, TA=20,VA=0.3,TR=20,RH=50,AL=1,CLO=1):
        """PMVとPPDを計算
        デフォルト値。TA,VA,TR,RHまでは入力を推奨
        TA = 20  #  温度[℃]
        VA = 0.3  # 流速[m/s]
        TR = 20  # MRT[℃]
        RH = 50  # 相対湿度[%]
        AL = 1  # 活動量[met]
        CLO = 1 # 着衣量[clo]
        
        """
        #***************************************************
        # 外部仕事 W＝0 [W/㎡]とする。
        #***************************************************
        # PMV 計算準備
        #
        M = AL * 58.15
        LCL = CLO
        W = 0
        #PA = (RH / 100 * np.exp(18.6686 - 4030.18 / (TA + 235))) / 0.00750062
        PPK = 673.4 - 1.8 * TA
        PPA = 3.2437814 + 0.00326014 * PPK + 2.00658 * 1E-9 * PPK * PPK * PPK
        PPB = (1165.09 - PPK) * (1 + 0.00121547 * PPK)
        PA = RH / 100 * 22105.8416 / np.exp(2.302585 * PPK * PPA / PPB) * 1000
        EPS = 1E-5
        MW = M - W
        # FCL＝着衣表面積／裸体表面積の比
        if LCL > 0.5:
            FCL = 1.05 + 0.1 * LCL
        else:
            FCL = 1 + 0.2 * LCL
        # 衣服表面温度TCLの初期値設定
        TCL = TA
        TCLA = TCL
        NOI = 1
        # 着衣表面温度の計算
        while True:
            TCLA = 0.8 * TCLA + 0.2 * TCL
            HC = 12.1 * np.sqrt(VA)
            if 2.38 * np.sqrt(np.sqrt(abs(TCL - TA))) > HC:
                HC = 2.38 * np.sqrt(np.sqrt(abs(TCL - TA)))
            TCL = 35.7 - 0.028 * MW - 0.155 * LCL * (3.96 * 1E-8 * FCL * ((TCLA + 273) ** 4 - (TR + 273) ** 4) + FCL * HC * (TCLA - TA))
            NOI = NOI + 1
            if NOI > 150:
                #PMV = 999990.999
                PMB = 3.0
                PPD = 100
                return (PMV,PPD)
            if not abs(TCLA - TCL) > EPS:
                break
        #PMVの計算
        PM1 = 3.96 * 1E-8 * FCL * ((TCL + 273) ** 4 - (TA + 273) ** 4)
        PM2 = FCL * HC * (TCL - TA)
        PM3 = 0.303 * np.exp(-0.036 * M) + 0.028
        if MW > 58.15:
            PM4 = 0.42 * (MW - 58.15)
        else:
            PM4 = 0
        PMV = PM3 * (MW - 3.05 * 0.001 * (5733 - 6.99 * MW - PA) - PM4 - 1.7 * 1E-5 * M * (5867 - PA) - 0.0014 * M * (34 - TA) - PM1 - PM2)
            #PRINT PMV
        if abs(PMV) > 3:
            #PMV = 999990.999
            PMV = 3.0
            PPD = 100
            return (PMV,PPD)
        
        PPD = 100 - 95 * np.exp(-0.0335 * PMV ** 4 - 0.2179 * PMV ** 2)
        
        return (PMV,PPD)
    
    def calc_MRT(self, T_Parsed):
        '''MRTを計算'''
        
        T_wall_list = np.array([])
        if np.array(T_Parsed['internalField']).ndim==0:  # time_step=0
            for boundary in list(T_Parsed['boundaryField']):
                if T_Parsed['boundaryField'][boundary]['type']=='zeroGradient' or \
                T_Parsed['boundaryField'][boundary]['type']=='empty' or \
                    T_Parsed['boundaryField'][boundary]['type']=='fixedValue':
                    T_wall = np.array([])
                else:
                    numpy_Parsed_value = np.array(T_Parsed['boundaryField'][boundary]['value'])
                    T_wall = self.initial_to_float(numpy_Parsed_value)
                T_wall_list = np.append(T_wall_list, T_wall)
                
        else:
            for boundary in list(T_Parsed['boundaryField']):
                if T_Parsed['boundaryField'][boundary]['type']=='fixedValue':
                    numpy_Parsed_value = np.array(T_Parsed['boundaryField'][boundary]['value'])
                    T_wall = self.initial_to_float(numpy_Parsed_value)
                elif T_Parsed['boundaryField'][boundary]['type']=='zeroGradient' or \
                T_Parsed['boundaryField'][boundary]['type']=='empty':
                    T_wall = np.array([])
                else:
                    T_wall = np.array(T_Parsed['boundaryField'][boundary]['value'])
                    if T_wall.ndim==0:
                        T_wall = self.initial_to_float(T_wall)
                T_wall_list = np.append(T_wall_list, T_wall)
        return np.average(T_wall_list)
    
    def Celsius(self, T):
        CelsiusT = T - 273.15
        return CelsiusT
    
    def Celsius_(self, T):
        '''np.arrayの配列をセルシウス℃に変換'''
        if np.array(T).size==1:
            return self.Celsius(T)
        else:
            Celsiuss = np.frompyfunc(self.Celsius,1,1)  # リストに適用可にする
            return Celsiuss(T)
        
    def UScalar(self, U):
        '''Uをスカラーに変換'''
        if np.array(U).size<=3:
            return np.array([np.sqrt(U[0]**2 + U[1]**2)])
        else:
            return np.sqrt(U[:,0]**2 + U[:,1]**2)
        
    def calc_PMV_all(self, TU_Parsed,RH=50,AL=1,CLO=1):
        '''PMVを一つのtime_stepで全点計算
        TU_Parsed : TとUのParsedParameterFileをリストにしたもの
        全ての点のPMVとPPVの値を返す
        time=0でも、すべてのセルの値を返す。'''
        T_Parsed,U_Parsed = TU_Parsed
        T = np.array(T_Parsed['internalField'])
        U = np.array(U_Parsed['internalField'])
        # time_step==0の場合
        if T.ndim==0 or U.ndim==0:
            T = self.initial_to_float(T)
            U = self.initial_to_float(U)
            # Uを速さに変換
            Us = self.UScalar(U)
            MRT = self.calc_MRT(T_Parsed)
            # TとMRTをセルシウス温度に変換
            Tc = self.Celsius_(T)
            MRTc = self.Celsius_(MRT)
            pmv,ppd = self.calc_PMV(TA=Tc,VA=Us,TR=MRTc,RH=RH,AL=AL,CLO=CLO)
            PMV = np.tile(pmv, self.nCells)
            PPD = np.tile(ppd, self.nCells)
        else:   
            # Uを速さに変換
            Us = self.UScalar(U)
            MRT = self.calc_MRT(T_Parsed)
            # TとMRTをセルシウス温度に変換
            Tc = list(self.Celsius_(T))
            MRTc = self.Celsius_(MRT)
            
            length = len(T)
            # ループを早くするため、外に出す。
            PMV = []
            PPD = []
            PMVappend = PMV.append
            PPDappend = PPD.append
            for i in range(length):
                pmv,ppd = self.calc_PMV(TA=Tc[i],VA=Us[i],TR=MRTc,RH=RH,AL=AL,CLO=CLO)
                PMVappend(pmv)
                PPDappend(ppd)
            PMV = np.array(PMV)
            PPD = np.array(PPD)
        return [PMV,PPD]
    
    def calc_PMV_error(self, TU_Parsed,RH=50,AL=1,CLO=1):
        """PMVの全点の2条誤差の合計を計算
        入力はcalc_PMV_allと同じ。返すものだけが違う。
        PMVは、0との2乗誤差、PPDは0との、根平均2乗誤差を返す。
        """
        PMV, PPD = self.calc_PMV_all(TU_Parsed, RH=RH,AL=AL,CLO=CLO)
        PMV_mae = ((PMV - 0)**2).mean()
        PPD_rmse = np.sqrt( ((PPD - 0)**2).mean())
        return PMV_mae, PPD_rmse
    
    def header(self, time_step, filename):
        '''headerファイルを作成'''
        header = """/*--------------------------------*- C++ -*----------------------------------*\
=========                 |
  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
   \\    /   O peration     | Website:  https://openfoam.org
    \\  /    A nd           | Version:  6
     \\/     M anipulation  |
\*---------------------------------------------------------------------------*/
FoamFile
{{
    version     2.0;
    format      ascii;
    class       volScalarField;
    location    "{}";
    object      {};
}}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
""".format(time_step, filename)
        return header
    
    def internal(self, list_internal):
        '''internalFieldの値の作成'''
        if len(list_internal)==1:
            internal = """
internalField   uniform {};""".format(list_internal[0])
        else:
            str_= np.frompyfunc(str,1,1)
            str_internal = '\n'.join(str_(list_internal))
            internal = """
internalField   nonuniform List<scalar> 
{}
(
{}
)
;
""".format(self.nCells, str_internal)
        return internal
    
    def makePMVFile(self,time_step):
        '''PMVとPPDファイルを書き込む'''
        
        path_pmv = self.CASE.name + '/' + str(time_step) + '/PMV' # 書き込むパス
        path_ppd = self.CASE.name + '/' + str(time_step) + '/PPD'
        
        demensions = """
dimensions      [0 0 0 0 0 0 0];
"""
        
        boundary = """
boundaryField
{
    ".*"
    {
        type            zeroGradient;
    }
}


// ************************************************************************* //
"""
        # header, dimensions, internal, boundaryの順に書き込む
        f = open(path_pmv, 'w') # ファイルを開く(該当ファイルがなければ新規作成)
        g = open(path_ppd, 'w')
        f.write(self.header(time_step,"PMV")) # headerを記載する
        g.write(self.header(time_step,"PPD"))
        f.write(demensions) # dimensionsを記載する
        g.write(demensions)
        # internalFieldの計算
        TU_Parsed = self.getParsed(time_step)
        PMV,PPD = self.calc_PMV_all(TU_Parsed)
        internal_PMV = self.internal(PMV)
        internal_PPD = self.internal(PPD)
        f.write(internal_PMV)  
        g.write(internal_PPD)
        f.write(boundary)
        g.write(boundary)
        f.close() 
        g.close()

        
    def makePMVList(self,first_step, last_step, write_step):
        '''任意の範囲でPMVファイルを作成'''
        for stp in range(first_step, last_step, write_step):
            self.makePMVFile(stp)
            
        
    def meshNumberFile(self,time_step):
        '''メッシュの並びを確認する'''
        path_mesh = self.CASE.name + '/' + str(time_step) + '/Meshnumber' # 書き込むパス


        demensions = """
dimensions      [0 0 0 0 0 0 0];
"""
        boundary = """
boundaryField
{
    ".*"
    {
        type            zeroGradient;
    }
}


// ************************************************************************* //
"""
        f = open(path_mesh, 'w') # ファイルを開く(該当ファイルがなければ新規作成)
        f.write(self.header(time_step,"PMV")) # headerを記載する
        f.write(demensions) # dimensionsを記載する
        mesh_list = [x for x in range(1,self.nCells+1)]
        internal_mesh = self.internal(mesh_list)
        f.write(internal_mesh)  
        f.write(boundary)
        f.close() 
            
    def calc_ADPI(self,TU_Parsed,occupied_zone_cell):
        '''ADPIを計算する'''
        
        # occupied_zone_cellはaircond5の場合は1~340までのセルが居住域
        T_Parsed,U_Parsed = TU_Parsed
        T = np.array(T_Parsed['internalField'])
        U = np.array(U_Parsed['internalField'])
        # time_step==0の場合
        if T.ndim==0 or U.ndim==0:
            T = self.initial_to_float(T)
            U = self.initial_to_float(U)
        
        Tc = np.average(T)  # 室内の平均温度
        Us = self.UScalar(U)  # 流速
        theta = (T - Tc) - 8.0*(Us - 0.15)  # 有効ドラフト温度
        
        satisfy_theta = np.where((theta > -1.5) & (theta < 1), 1, 0)
        satisfy_Us = np.where(Us < 0.35,1, 0)  # 条件を満たすものを1,満たさないものを0
        satisfy_all = satisfy_theta + satisfy_Us
        satisfy = satisfy_all[:occupied_zone_cell]
        nCells = satisfy.size
        num_satisfy = np.sum(satisfy == 2)
        ADPI = num_satisfy/nCells*100
        
        return (ADPI, theta)
    
    def calc_EUC(self,T_Parsed, occupied_zone_cell,last_cell):
        '''EUCを計算する'''
        
        T = np.array(T_Parsed['internalField'])
        T0 = self.initial_to_float(T_Parsed['boundaryField']['inlet']['value'])[0] # 給気温度

        if T.ndim==0:
            T = self.initial_to_float(T)[0]
            Toz = T
            Tiz = T
        else:
            Toz = np.average(T[occupied_zone_cell:last_cell])  # 居住域外の平均温度  
            Tiz = np.average(T[:occupied_zone_cell])  # 居住域内の平均温度
        EUC = (Toz-T0) / (Tiz-T0) * 100
        return EUC
        
    def getPMVList(self, first_step, last_step, write_step):
        '''任意の範囲のPMVの平均値ファイルを取得'''
        
        # ループを早くするため、外に出す。
        PMV_list = []
        PPD_list = []
        PMVappend = PMV_list.append
        PPDappend = PPD_list.append
        for stp in range(first_step, last_step, write_step):
            TU_Parsed = self.getParsed(stp)
            PMV,PPD = self.calc_PMV_all(TU_Parsed)
            pmv = np.average(np.array(PMV))
            ppd = np.average(np.array(PPD))
            PMVappend(pmv)
            PPDappend(ppd)
        return [PMV_list, PPD_list]
    
    
    def getADPIList(self, first_step, last_step, write_step,occupied_zone_cell=342):
        '''任意の範囲のADPIの値を取得'''
        
        ADPI_list = []
        ADPIappend = ADPI_list.append
        for stp in range(first_step, last_step, write_step):
            TU_Parsed = self.getParsed(stp)
            adpi,theta = self.calc_ADPI(TU_Parsed, occupied_zone_cell)
            ADPIappend(adpi)
        return ADPI_list
    
    def getEUCList(self, first_step, last_step, write_step,
                    occupied_zone_cell=342, last_cell=100000):
        '''任意の範囲のEUCの値を算出'''
        
        EUC_list = []
        EUCappend = EUC_list.append
        for stp in range(first_step, last_step, write_step):
            T_Parsed,U_Parsed = self.getParsed(stp)
            euc = self.calc_EUC(T_Parsed, occupied_zone_cell, last_cell)
            EUCappend(euc)
        return EUC_list
    
    def getTUList(self, first_step, last_step, write_step):
        '''任意の範囲のTとUの平均値を取得'''
        
        T_list = []
        U_list = []
        MRT_list = []
        Tappend = T_list.append
        Uappend = U_list.append
        MRTappend = MRT_list.append
        for stp in range(first_step, last_step, write_step):
            T_Parsed, U_Parsed = self.getParsed(stp)
            T = np.array(T_Parsed['internalField'])
            U = np.array(U_Parsed['internalField'])
            # time_step==0の場合
            if T.ndim==0 or U.ndim==0:
                T = self.initial_to_float(T)
                U = self.initial_to_float(U)
            # Uを速さに変換
            T = np.average(T)
            Us = np.average(np.array(self.UScalar(U)))
            MRT = np.average(np.array(self.calc_MRT(T_Parsed)))
            # TとMRTをセルシウス温度に変換
            Tc = self.Celsius(T)
            MRTc = self.Celsius(MRT)
            Tappend(Tc)
            Uappend(Us)
            MRTappend(MRTc)
        return [T_list,U_list,MRT_list]
        
        
        
    def change_control(self,control):
        if control == 1:
            self.blockMeshDict['blocks'][2] = Vector(20,10,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.02
        if control == 2:
            self.blockMeshDict['blocks'][2] = Vector(40,20,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.02
        if control == 3:
            self.blockMeshDict['blocks'][2] = Vector(20,10,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.01
        if control == 4:
            self.blockMeshDict['blocks'][2] = Vector(40,20,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.01
            
    def write_interval(self, writeInterval):
        self.controlDict['writeInterval'] = writeInterval
        
        
    def reset(self):
        '''環境のリセット'''
        
        # reset parameter
        self.present_time = 0  
        self.startTime = 0
        self.endTime = copy(self.stride)
        
        # reset control Dict
        clDict = ParsedParameterFile(self.CASE.controlDict())
        clDict['startTime'] = self.startTime
        clDict['endTime'] = self.endTime
        clDict.writeFile()
        #self.startTime = clDict['startTime']
        #self.endTime = clDict['endTime']
        
        #os.system('./Allclean')
        os.system(self.CASE.name + '/Makemesh')
        
        # 初期条件の設定（ランダム）
        T_initial = ParsedParameterFile(self.CASE.initialDir() + '/T')
        # random parameter from 26 to 35
        T_rand = np.random.randint(26+273,35+273)
        T_initial['internalField'].setUniform(T_rand)
        T_initial.writeFile()
        
        
        # set action and observation
        self.action_space= self.make_action()
        self.observation = self.make_observation(self.CASE.initialDir())
        return self.observation
    
    def step_old(self, action):
        '''ステップを進める'''
        #clDict = ParsedParameterFile(self.CASE.controlDict())      
        if self.present_time >= self.end:
            done = True
            runOK = 'end'
        else:
            done = False
            
            # actionに従った、境界条件を設定
            # action is 0~26
            U_latest = ParsedParameterFile(self.CASE.latestDir() + '/U')
            T_latest = ParsedParameterFile(self.CASE.latestDir() + '/T')
            self.act = self.action_space[action]
            U_latest['boundaryField']['inlet']['value'].setUniform(Vector(self.act[0],self.act[1],0))
            U_latest.writeFile()
            T_latest['boundaryField']['inlet']['value'].setUniform(self.act[2])
            T_latest.writeFile()
            
            # OpenFOAMのコマンドを実行
            args=shlex.split("buoyantPimpleFoam -case " + self.CASE.name)
            buoyant=BasicRunner(args,silent=True)
            self.summary=buoyant.start()
            runOK = buoyant.runOK()
            
            #os.system("buoyantBoussinesqPimpleFoam")
            
            # clDictのコントロール
            self.present_time += self.stride
            clDict = ParsedParameterFile(self.CASE.controlDict())
            self.startTime += self.stride
            self.endTime += self.stride
            clDict['startTime'] = self.startTime
            clDict['endTime'] = self.endTime
            clDict.writeFile()
            
            self.startTime = clDict['startTime']
            self.endTime = clDict['endTime']
            
            self.observation = self.make_observation(self.CASE.latestDir())
            
        return (self.observation, done, runOK)
    
    
    def step(self, action, reward='PMV'):
        '''ステップを進める
        報酬はPMV等から選択
        '''
        #clDict = ParsedParameterFile(self.CASE.controlDict())      
        if self.present_time >= self.end:
            done = True
            runOK = 'end'
            # rewardと、observationは1ステップ前の値をそのまま使う。
        else:
            done = False
            
            # actionに従った、境界条件を設定
            # action is 0~26
            U_latest = ParsedParameterFile(self.CASE.latestDir() + '/U')
            T_latest = ParsedParameterFile(self.CASE.latestDir() + '/T')
            self.act = self.action_space[action]
            U_latest['boundaryField']['inlet']['value'].setUniform(Vector(self.act[0],self.act[1],0))
            U_latest.writeFile()
            T_latest['boundaryField']['inlet']['value'].setUniform(self.act[2])
            T_latest.writeFile()
            
            # OpenFOAMのコマンドを実行
            args=shlex.split("buoyantPimpleFoam -case " + self.CASE.name)
            buoyant=BasicRunner(args,silent=True)
            self.summary=buoyant.start()
            runOK = buoyant.runOK()
            
            #os.system("buoyantBoussinesqPimpleFoam")
            
            # clDictのコントロール
            self.present_time += self.stride
            clDict = ParsedParameterFile(self.CASE.controlDict())
            self.startTime += self.stride
            self.endTime += self.stride
            clDict['startTime'] = self.startTime
            clDict['endTime'] = self.endTime
            clDict.writeFile()
            
            self.startTime = clDict['startTime']
            self.endTime = clDict['endTime']
            
            self.observation = self.make_observation(self.CASE.latestDir())
            
            # 報酬の計算。make_observationでは、0を補完していれているため、用いない。
            T_new = ParsedParameterFile(self.CASE.latestDir() + '/T')
            U_new = ParsedParameterFile(self.CASE.latestDir() + '/U')
            TU_Parsed = [T_new,U_new]
            PMV_mae, PPD_rmse = self.calc_PMV_error(TU_Parsed, RH=50,AL=1,CLO=1)
            # 報酬は、ズレ分をマイナス、ちかづいたら、プラスにする。
            self.reward = -PMV_mae + 1
            
        
        return (self.observation, self.reward, done, runOK)
        

In [4]:
# aircondを並列でたくさんつくるためのクラス

# ケースの作成
def makecase(NUM_PROCESSES,casename='Case',stride=500, end=3000, xCells=40,
                         insert_list = [15,15,15,15,33,33,33,51,69,69,69,87,105,105,105,142,142,142,342,342,380,380]):
    """並列でたくさんのケースをつくる
    xCells : x方向のセル数
    insert_list : 障害物があり、ゼロ埋めするセル
    """
    os.system("./makecase {} {}".format(NUM_PROCESSES, casename))
    Envs = []
    Envs_append = Envs.append
    for i in range(NUM_PROCESSES):
        CASE = SolutionDirectory("./{}/case{}".format(casename, i))
        aircond = Aircond(CASE, stride=stride, end=end, xCells=xCells, insert_list=insert_list)
        Envs_append(aircond)
    return Envs

In [5]:
# 定数の設定

#ENV_NAME = 'BreakoutNoFrameskip-v4' 
# Breakout-v0ではなく、BreakoutNoFrameskip-v4を使用
# v0はフレームが自動的に2-4のランダムにskipされますが、今回はフレームスキップはさせないバージョンを使用
# 参考URL https://becominghuman.ai/lets-build-an-atari-ai-part-1-dqn-df57e8ff3b26
# https://github.com/openai/gym/blob/5cb12296274020db9bb6378ce54276b31e7002da/gym/envs/__init__.py#L371
    
#NUM_SKIP_FRAME = 4 # skipするframe数です  # 使用しない。
NUM_STACK_FRAME = 1  # 状態として連続的に保持するframe数です
#NOOP_MAX = 30  #  reset時に何もしないフレームを挟む（No-operation）フレーム数の乱数上限です
NUM_PROCESSES = 2 #  並列して同時実行するプロセス数です
NUM_ADVANCED_STEP = 3  # 何ステップ進めて報酬和を計算するのか設定
GAMMA = 0.90  # 時間割引率

TOTAL_FRAMES=10e6  #  学習に使用する総フレーム数
NUM_UPDATES = int(TOTAL_FRAMES / NUM_ADVANCED_STEP / NUM_PROCESSES)  # ネットワークの総更新回数
# NUM_UPDATESは100,000となる


In [6]:
#NUM_UPDATES = 100000
STRIDE = 300
END = 3000
XCELLS = 40
INSERT_LIST = [15,15,15,15,33,33,33,51,69,69,69,87,105,105,105,142,142,142,342,342,380,380]

In [7]:
# A2Cの損失関数の計算のための定数設定
value_loss_coef = 0.5
entropy_coef = 0.01
max_grad_norm = 0.5

# 学習手法RMSpropの設定
lr = 7e-4
eps = 1e-5
alpha = 0.99


In [8]:
# GPUの使用の設定
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)


cuda


In [9]:
# メモリオブジェクトの定義


class RolloutStorage(object):
    '''Advantage学習するためのメモリクラスです'''

    def __init__(self, num_steps, num_processes, obs_shape):

        self.observations = torch.zeros(
            num_steps + 1, num_processes, *obs_shape).to(device)
        # *を使うと()リストの中身を取り出す
        # obs_shape→(4,84,84)
        # *obs_shape→ 4 84 84

        self.masks = torch.ones(num_steps + 1, num_processes, 1).to(device)
        self.rewards = torch.zeros(num_steps, num_processes, 1).to(device)
        self.actions = torch.zeros(
            num_steps, num_processes, 1).long().to(device)

        # 割引報酬和を格納
        self.returns = torch.zeros(num_steps + 1, num_processes, 1).to(device)
        self.index = 0  # insertするインデックス

    def insert(self, current_obs, action, reward, mask):
        '''次のindexにtransitionを格納する'''
        self.observations[self.index + 1].copy_(current_obs)
        self.masks[self.index + 1].copy_(mask)
        self.rewards[self.index].copy_(reward)
        self.actions[self.index].copy_(action)

        self.index = (self.index + 1) % NUM_ADVANCED_STEP  # インデックスの更新

    def after_update(self):
        '''Advantageするstep数が完了したら、最新のものをindex0に格納'''
        self.observations[0].copy_(self.observations[-1])
        self.masks[0].copy_(self.masks[-1])

    def compute_returns(self, next_value):
        '''Advantageするステップ中の各ステップの割引報酬和を計算する'''

        # 注意：5step目から逆向きに計算しています
        # 注意：5step目はAdvantage1となる。4ステップ目はAdvantage2となる。・・・
        self.returns[-1] = next_value
        for ad_step in reversed(range(self.rewards.size(0))):
            self.returns[ad_step] = self.returns[ad_step + 1] * \
                GAMMA * self.masks[ad_step + 1] + self.rewards[ad_step]


In [10]:
# A2Cのディープ・ニューラルネットワークの構築


def init(module, gain):
    '''層の結合パラメータを初期化する関数を定義'''
    nn.init.orthogonal_(module.weight.data, gain=gain)
    nn.init.constant_(module.bias.data, 0)
    return module


class Flatten(nn.Module):
    '''コンボリューション層の出力画像を1次元に変換する層を定義'''

    def forward(self, x):
        return x.view(x.size(0), -1)


class Net(nn.Module):
    def __init__(self, n_out):
        super(Net, self).__init__()

        # 結合パラメータの初期化関数
        def init_(module): return init(
            module, gain=nn.init.calculate_gain('relu'))

        # コンボリューション層の定義
        self.conv = nn.Sequential(
            # 画像サイズの変化12*40→4*18
            init_(nn.Conv2d(3, 32, kernel_size=5,stride=2)),
            # stackするflameは4画像なのでinput=NUM_STACK_FRAME=4である、出力は32とする、
            # sizeの計算  size = (Input_size - Kernel_size + 2*Padding_size)/ Stride_size + 1

            nn.ReLU(),
            # 画像サイズの変化4*18→3*17
            init_(nn.Conv2d(32, 64, kernel_size=2, stride=1)),
            nn.ReLU(),
            init_(nn.Conv2d(64, 64, kernel_size=2, stride=1)),  # 画像サイズの変化3*17→2*16
            nn.ReLU(),
            Flatten(),  # 画像形式を1次元に変換
            init_(nn.Linear(64 * 2 * 16, 512)),  # 64枚の7×7の画像を、512次元のoutputへ
            nn.ReLU()
        )

        # 結合パラメータの初期化関数
        def init_(module): return init(module, gain=1.0)

        # Criticの定義
        self.critic = init_(nn.Linear(512, 1))  # 状態価値なので出力は1つ

        # 結合パラメータの初期化関数
        def init_(module): return init(module, gain=0.01)

        # Actorの定義
        self.actor = init_(nn.Linear(512, n_out))  # 行動を決めるので出力は行動の種類数

        # ネットワークを訓練モードに設定
        self.train()

    def forward(self, x):
        '''ネットワークのフォワード計算を定義します'''
        #input = x / 255.0  # 画像のピクセル値0-255を0-1に正規化する
        input = x  # 正規化はしない
        conv_output = self.conv(input)  # Convolution層の計算
        critic_output = self.critic(conv_output)  # 状態価値の計算
        actor_output = self.actor(conv_output)  # 行動の計算

        return critic_output, actor_output

    def act(self, x):
        '''状態xから行動を確率的に求めます'''
        value, actor_output = self(x)
        probs = F.softmax(actor_output, dim=1)    # dim=1で行動の種類方向に計算
        action = probs.multinomial(num_samples=1)

        return action

    def get_value(self, x):
        '''状態xから状態価値を求めます'''
        value, actor_output = self(x)

        return value

    def evaluate_actions(self, x, actions):
        '''状態xから状態価値、実際の行動actionsのlog確率とエントロピーを求めます'''
        value, actor_output = self(x)

        log_probs = F.log_softmax(actor_output, dim=1)  # dim=1で行動の種類方向に計算
        action_log_probs = log_probs.gather(1, actions)  # 実際の行動のlog_probsを求める

        probs = F.softmax(actor_output, dim=1)  # dim=1で行動の種類方向に計算
        dist_entropy = -(log_probs * probs).sum(-1).mean()

        return value, action_log_probs, dist_entropy


In [11]:
# エージェントが持つ頭脳となるクラスを定義、全エージェントで共有する


class Brain(object):
    def __init__(self, actor_critic):

        self.actor_critic = actor_critic  # actor_criticはクラスNetのディープ・ニューラルネットワーク

        # 結合パラメータをロードする場合
        #filename = 'weight.pth'
        #param = torch.load(filename, map_location='cpu')
        # self.actor_critic.load_state_dict(param)

        # パラメータ更新の勾配法の設定
        self.optimizer = optim.RMSprop(
            actor_critic.parameters(), lr=lr, eps=eps, alpha=alpha)

    def update(self, rollouts):
        '''advanced計算した5つのstepの全てを使って更新します'''
        obs_shape = rollouts.observations.size()[2:]  # torch.Size([4, 84, 84])
        num_steps = NUM_ADVANCED_STEP
        num_processes = NUM_PROCESSES

        values, action_log_probs, dist_entropy = self.actor_critic.evaluate_actions(
            rollouts.observations[:-1].view(-1, *obs_shape),
            rollouts.actions.view(-1, 1))

        # 注意：各変数のサイズ
        # rollouts.observations[:-1].view(-1, *obs_shape) torch.Size([80, 4, 84, 84])
        # rollouts.actions.view(-1, 1) torch.Size([80, 1])
        # values torch.Size([80, 1])
        # action_log_probs torch.Size([80, 1])
        # dist_entropy torch.Size([])

        values = values.view(num_steps, num_processes,
                             1)  # torch.Size([5, 16, 1])
        action_log_probs = action_log_probs.view(num_steps, num_processes, 1)

        advantages = rollouts.returns[:-1] - values  # torch.Size([5, 16, 1])
        value_loss = advantages.pow(2).mean()

        action_gain = (advantages.detach() * action_log_probs).mean()
        # detachしてadvantagesを定数として扱う

        total_loss = (value_loss * value_loss_coef -
                      action_gain - dist_entropy * entropy_coef)

        self.optimizer.zero_grad()  # 勾配をリセット
        total_loss.backward()  # バックプロパゲーションを計算
        nn.utils.clip_grad_norm_(self.actor_critic.parameters(), max_grad_norm)
        #  一気に結合パラメータが変化しすぎないように、勾配の大きさは最大0.5までにする

        self.optimizer.step()  # 結合パラメータを更新


In [12]:
def resets(Envs):
    """環境をまとめてリセット"""
    obs = []
    obs_append = obs.append
    for i in range(len(Envs)):
        obs_ = Envs[i].reset()
        obs_append(obs_)
    obs = np.array(obs)
    return obs

In [13]:
def steps(Envs, action):
    """すべての環境で同じだけステップを進める
    obsはnumpy, done, runOKはリスト
    """
    obs = []
    reward = []
    done = []
    runOK = []
    obs_append = obs.append
    reward_append = reward.append
    done_append = done.append
    runOK_append = runOK.append
    for i in range(len(Envs)):
        obs_, reward_, done_, runOK_ = Envs[i].step(action[i])
        obs_append(obs_)
        reward_append(reward_)
        done_append(done_)
        runOK_append(runOK_)
    obs = np.array(obs)
    return obs, reward, done, runOK
    

In [14]:
def make_random_actions(Envs, max_execution=3):
    """適当にステップを進めて、環境をバラバラにするためのステップを作成。
    Envs : 環境のリスト
    max_execution : 進める最大のステップ。0~ステップ分進めることになる。
    random_actionは複素数で返す。0+1Jは補完したもの。
    """
    # 0~max_executions
    # 複素数にして、1jは補完したものとする。
    action_shape = Envs[0].action_space.shape[0]
    random_actions = []
    for i in range(len(Envs)):
        i_th_action = []
        for j in range(random.randint(0,max_execution)):
            i_th_action.append(random.randint(0, action_shape-1))
        random_actions.append(i_th_action)
        
    max_len = max(map(len, random_actions))
    random_actions = np.array(list(map(lambda x:x + [1j]*(max_len-len(x)), random_actions)))
    random_actions = random_actions.astype(np.complex128)
    return random_actions

In [15]:
def random_steps(Envs, random_actions, step_reset=True):
    """random_actions分それぞれステップを進める。
    Envs : 環境のリスト
    random_actions, len(Envs)行,進めるステップ分で構成された、random_step。
    step_reset : Envs内のpresent_timeをリセットするかどうか。
    """
    # random_step分stepを進めて、とりあえず、リストとして保存。
    obs_list = []
    reward_list = []
    done_list = []
    runOK_list = []
    obs_list_append = obs_list.append
    reward_list_append = reward_list.append
    done_list_append = done_list.append
    runOK_list_append = runOK_list.append
    # random_actions.shape[0] == len(Envs)だが、やりたくない環境がある場合
    # やらないために、len(Envs)は使わない
    for i in range(random_actions.shape[0]):
        obs_progress = []
        reward_progress = []
        done_progress = []
        runOK_progress = []
        obs_progress_append = obs_progress.append
        reward_progress_append = reward_progress.append
        done_progress_append = done_progress.append
        runOK_progress_append = runOK_progress.append
        
        for j in range(random_actions.shape[1]):
            if random_actions[i,j].imag==0:  # 補完しただけのものには1jが入ってる
                obs_, done_, reward_, runOK_ = Envs[i].step(int(random_actions[i,j].real))
                obs_progress_append(obs_)
                reward_progress_append(reward_)
            else:
                done_, runOK_ = False, True
            done_progress_append(done_)
            runOK_progress_append(runOK_)
            
        obs_list_append(obs_progress)
        reward_list_append(reward_progress)
        done_list_append(done_progress)
        runOK_list_append(runOK_progress)
    
    # 進めた結果をまとめる。
    # obs → 最後のステップのobservation or 進めない場合、そのままのobservation
    # reward → 最後のステップのreward or 進めない場合、そのままのreward
    # done → 一個でもdoneがあれば、done=Trueとする。
    # runOK → 一個でも、Falseがあれば、Falseとする。
    obs = []
    reward = []
    done = []
    runOK = []
    obs_append = obs.append
    reward_append = reward.append
    done_append = done.append
    runOK_append = runOK.append
    for i in range(random_actions.shape[0]):
        if obs_list[i]==[]:
            obs_ = Envs[i].observation
        else:
            obs_ = obs_list[i][-1]
        obs_append(obs_)
        
        if reward_list[i]==[]:
            reward_ = Envs[i].reward
        else:
            reward_ = reward_list[i][-1]
        reward_append(reward_)
        
        if any(done_list[i]):
            done_ = True
        else:
            done_ = False
        done_append(done_)
        
        if all(runOK_list[i]):
            runOK_ = True
        else:
            runOK_ = False
        runOK_append(runOK_)
    obs = np.array(obs)
    
    if step_reset:
        for i in range(random_actions.shape[0]):
            Envs[i].present_time=0
            
    return obs, reward, done, runOK

In [16]:
# 流体版

# seedの設定
seed_num = 1
torch.manual_seed(seed_num)
if use_cuda:
    torch.cuda.manual_seed(seed_num)

# 実行環境を構築
torch.set_num_threads(seed_num)
Envs = makecase(NUM_PROCESSES, stride=STRIDE, end=END,
                xCells=XCELLS, insert_list = INSERT_LIST)

# 全エージェントが共有して持つ頭脳Brainを生成
n_out = Envs[0].action_space.shape[0]  # 行動の種類は27
actor_critic = Net(n_out).to(device)  # GPUへ
global_brain = Brain(actor_critic)

# 格納用変数の生成
obs_shape = Envs[0].observation_space.shape  # (3, 40, 12)
#obs_shape = (obs_shape[0] * NUM_STACK_FRAME,
#             *obs_shape[1:])  # (4, 84, 84)
# 状態数は一個でやる。よって、current_obsはそのままの値を格納。

# torch.Size([16, 3, 40, 12)
current_obs = torch.zeros(NUM_PROCESSES, *obs_shape).to(device)
rollouts = RolloutStorage(
    NUM_ADVANCED_STEP, NUM_PROCESSES, obs_shape)  # rolloutsのオブジェクト
episode_rewards = torch.zeros([NUM_PROCESSES, 1])  # 現在の試行の報酬を保持
final_rewards = torch.zeros([NUM_PROCESSES, 1])  # 最後の試行の報酬和を保持

# 初期状態の開始
obs = resets(Envs)
obs = torch.from_numpy(obs).float()  # torch.Size([16, 3, 40, 12])
current_obs = obs.to(device) # flameの4番目に最新のobsを格納

# advanced学習用のオブジェクトrolloutsの状態の1つ目に、現在の状態を保存
rollouts.observations[0].copy_(current_obs)

# 実行ループ
for j in tqdm(range(NUM_UPDATES)):
    # advanced学習するstep数ごとに計算
    for step in range(NUM_ADVANCED_STEP):

        # 行動を求める
        with torch.no_grad():
            action = actor_critic.act(rollouts.observations[step])
        
        cpu_actions = action.squeeze(1).cpu().numpy()  # tensorをNumPyに
        
        # 1stepの並列実行、なお返り値のobsのsizeは(16, 1, 84, 84)
        obs, reward, done, runOK = steps(Envs, cpu_actions)
        
        # 報酬をtensorに変換し、試行の総報酬に足す
        # sizeが(16,)になっているのを(16, 1)に変換
        reward = np.expand_dims(np.stack(reward), 1)
        reward = torch.from_numpy(reward).float()
        episode_rewards += reward
        
        # 各実行環境それぞれについて、doneならmaskは0に、継続中ならmaskは1にする
        masks = torch.FloatTensor(
            [[0.0] if done_ or not runOK_ else [1.0] for done_, runOK_ in zip(done,runOK)])
        # 最後の試行の総報酬を更新する
        final_rewards *= masks  # 継続中の場合は1をかけ算してそのまま、done時には0を掛けてリセット
        # 継続中は0を足す、done時にはepisode_rewardsを足す
        final_rewards += (1 - masks) * episode_rewards
        
        
        # 試行の総報酬を更新する
        episode_rewards *= masks  # 継続中のmaskは1なのでそのまま、doneの場合は0に
        
        # masksをGPUへ
        masks = masks.to(device)
        
        # 現在の状態をdone時には全部0にする
        # maskのサイズをtorch.Size([16, 1])→torch.Size([16, 1, 1 ,1])へ変換して、かけ算
        current_obs *= masks.unsqueeze(2).unsqueeze(2)
        
        # frameをstackする
        # torch.Size([16, 1, 40, 12])
        obs = torch.from_numpy(obs).float()
        current_obs = obs.to(device)  # 最新のobsを格納
        
        # メモリオブジェクトに今stepのtransitionを挿入
        rollouts.insert(current_obs, action.data, reward, masks)
        
    # advancedのfor loop終了

    # advancedした最終stepの状態から予想する状態価値を計算
    with torch.no_grad():
        next_value = actor_critic.get_value(
            rollouts.observations[-1]).detach()
        
        
    # 全stepの割引報酬和を計算して、rolloutsの変数returnsを更新
    rollouts.compute_returns(next_value)
    
    
    # ネットワークとrolloutの更新
    global_brain.update(rollouts)
    rollouts.after_update()
    
    # ログ：途中経過の出力
    if j % 100 == 0:
        print("finished frames {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}".
              format(j*NUM_PROCESSES*NUM_ADVANCED_STEP,
                     final_rewards.mean(),
                     final_rewards.median(),
                     final_rewards.min(),
                     final_rewards.max()))
    
    # 結合パラメータの保存
    if j % 1000 == 0:
        torch.save(global_brain.actor_critic.state_dict(),
                   './model/weight_'+str(j)+'.pth')

# 実行ループの終了
torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')

# 実行ループの終了
torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')


  0%|          | 1/1666666 [06:19<175475:49:53, 379.03s/it]

finished frames 0, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


  0%|          | 127/1666666 [20:48<16343:52:40, 35.31s/it]

finished frames 600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 225/1666666 [20:48<2748:45:19,  5.94s/it] 

finished frames 1200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 329/1666666 [20:49<463:57:10,  1.00s/it] 

finished frames 1800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 433/1666666 [20:49<79:51:55,  5.80it/s] 

finished frames 2400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 537/1666666 [20:50<15:19:27, 30.20it/s]

finished frames 3000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 641/1666666 [20:51<4:28:35, 103.38it/s]

finished frames 3600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 724/1666666 [20:51<2:49:52, 163.45it/s]

finished frames 4200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 829/1666666 [20:51<2:22:36, 194.69it/s]

finished frames 4800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 933/1666666 [20:52<2:17:48, 201.45it/s]

finished frames 5400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1036/1666666 [20:53<2:20:52, 197.05it/s]

finished frames 6000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1139/1666666 [20:53<2:16:59, 202.62it/s]

finished frames 6600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1222/1666666 [20:53<2:17:43, 201.54it/s]

finished frames 7200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1327/1666666 [20:54<2:16:55, 202.72it/s]

finished frames 7800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1435/1666666 [20:55<2:15:43, 204.49it/s]

finished frames 8400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1542/1666666 [20:55<2:14:14, 206.72it/s]

finished frames 9000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1628/1666666 [20:56<2:14:52, 205.76it/s]

finished frames 9600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1737/1666666 [20:56<2:13:51, 207.31it/s]

finished frames 10200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1823/1666666 [20:56<2:14:52, 205.72it/s]

finished frames 10800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 1932/1666666 [20:57<2:13:47, 207.39it/s]

finished frames 11400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2039/1666666 [20:58<2:16:25, 203.37it/s]

finished frames 12000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2124/1666666 [20:58<2:15:10, 205.24it/s]

finished frames 12600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2233/1666666 [20:58<2:13:27, 207.86it/s]

finished frames 13200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2341/1666666 [20:59<2:13:16, 208.13it/s]

finished frames 13800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2426/1666666 [20:59<2:22:36, 194.50it/s]

finished frames 14400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2533/1666666 [21:00<2:23:46, 192.90it/s]

finished frames 15000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2639/1666666 [21:01<2:15:27, 204.75it/s]

finished frames 15600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2725/1666666 [21:01<2:14:58, 205.47it/s]

finished frames 16200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2834/1666666 [21:02<2:13:29, 207.72it/s]

finished frames 16800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 2942/1666666 [21:02<2:13:01, 208.44it/s]

finished frames 17400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3028/1666666 [21:02<2:15:29, 204.64it/s]

finished frames 18000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3137/1666666 [21:03<2:10:41, 212.14it/s]

finished frames 18600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3225/1666666 [21:03<2:10:30, 212.42it/s]

finished frames 19200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3335/1666666 [21:04<2:10:12, 212.90it/s]

finished frames 19800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3423/1666666 [21:04<2:10:29, 212.43it/s]

finished frames 20400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3533/1666666 [21:05<2:10:11, 212.90it/s]

finished frames 21000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3643/1666666 [21:05<2:10:25, 212.51it/s]

finished frames 21600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3731/1666666 [21:06<2:10:08, 212.96it/s]

finished frames 22200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3841/1666666 [21:06<2:09:31, 213.97it/s]

finished frames 22800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 3929/1666666 [21:07<2:10:21, 212.60it/s]

finished frames 23400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4039/1666666 [21:07<2:12:24, 209.28it/s]

finished frames 24000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4126/1666666 [21:08<2:10:50, 211.77it/s]

finished frames 24600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4235/1666666 [21:08<2:10:27, 212.38it/s]

finished frames 25200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4323/1666666 [21:09<2:10:58, 211.53it/s]

finished frames 25800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4433/1666666 [21:09<2:10:19, 212.57it/s]

finished frames 26400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4543/1666666 [21:10<2:10:13, 212.72it/s]

finished frames 27000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4631/1666666 [21:10<2:10:47, 211.78it/s]

finished frames 27600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4741/1666666 [21:11<2:10:30, 212.24it/s]

finished frames 28200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4829/1666666 [21:11<2:10:16, 212.59it/s]

finished frames 28800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 4939/1666666 [21:12<2:13:20, 207.71it/s]

finished frames 29400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5027/1666666 [21:12<2:13:27, 207.51it/s]

finished frames 30000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5136/1666666 [21:12<2:10:52, 211.58it/s]

finished frames 30600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5224/1666666 [21:13<2:10:45, 211.78it/s]

finished frames 31200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5334/1666666 [21:13<2:10:10, 212.70it/s]

finished frames 31800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5444/1666666 [21:14<2:09:40, 213.52it/s]

finished frames 32400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5532/1666666 [21:14<2:10:03, 212.86it/s]

finished frames 33000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5642/1666666 [21:15<2:10:08, 212.71it/s]

finished frames 33600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5730/1666666 [21:15<2:10:26, 212.23it/s]

finished frames 34200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5840/1666666 [21:16<2:09:54, 213.07it/s]

finished frames 34800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 5928/1666666 [21:16<2:10:13, 212.54it/s]

finished frames 35400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6038/1666666 [21:17<2:12:35, 208.73it/s]

finished frames 36000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6125/1666666 [21:17<2:10:27, 212.14it/s]

finished frames 36600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6235/1666666 [21:18<2:09:50, 213.12it/s]

finished frames 37200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6323/1666666 [21:18<2:09:36, 213.51it/s]

finished frames 37800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6433/1666666 [21:19<2:09:43, 213.30it/s]

finished frames 38400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6543/1666666 [21:19<2:09:27, 213.72it/s]

finished frames 39000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6631/1666666 [21:19<2:09:59, 212.85it/s]

finished frames 39600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6741/1666666 [21:20<2:09:44, 213.22it/s]

finished frames 40200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6829/1666666 [21:20<2:09:52, 213.01it/s]

finished frames 40800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 6939/1666666 [21:21<2:09:54, 212.92it/s]

finished frames 41400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7027/1666666 [21:21<2:12:33, 208.66it/s]

finished frames 42000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7136/1666666 [21:22<2:21:51, 194.98it/s]

finished frames 42600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7223/1666666 [21:22<2:12:55, 208.06it/s]

finished frames 43200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7333/1666666 [21:23<2:11:03, 211.01it/s]

finished frames 43800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7443/1666666 [21:23<2:09:52, 212.94it/s]

finished frames 44400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7531/1666666 [21:24<2:09:50, 212.98it/s]

finished frames 45000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7641/1666666 [21:24<2:09:19, 213.80it/s]

finished frames 45600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7729/1666666 [21:25<2:09:41, 213.19it/s]

finished frames 46200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7839/1666666 [21:25<2:09:56, 212.77it/s]

finished frames 46800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 7927/1666666 [21:26<2:10:10, 212.38it/s]

finished frames 47400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 8037/1666666 [21:26<2:12:20, 208.88it/s]

finished frames 48000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 8124/1666666 [21:27<2:10:53, 211.19it/s]

finished frames 48600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  0%|          | 8234/1666666 [21:27<2:09:57, 212.69it/s]

finished frames 49200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 8344/1666666 [21:28<2:09:39, 213.17it/s]

finished frames 49800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 8432/1666666 [21:28<2:09:48, 212.90it/s]

finished frames 50400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 8542/1666666 [21:29<2:09:51, 212.82it/s]

finished frames 51000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 8630/1666666 [21:29<2:09:45, 212.96it/s]

finished frames 51600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 8740/1666666 [21:29<2:09:05, 214.04it/s]

finished frames 52200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 8828/1666666 [21:30<2:09:26, 213.45it/s]

finished frames 52800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 8938/1666666 [21:30<2:09:12, 213.82it/s]

finished frames 53400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9026/1666666 [21:31<2:12:18, 208.80it/s]

finished frames 54000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9136/1666666 [21:31<2:09:51, 212.74it/s]

finished frames 54600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9224/1666666 [21:32<2:09:25, 213.43it/s]

finished frames 55200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9334/1666666 [21:32<2:09:20, 213.57it/s]

finished frames 55800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9444/1666666 [21:33<2:08:50, 214.36it/s]

finished frames 56400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9532/1666666 [21:33<2:21:03, 195.80it/s]

finished frames 57000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9619/1666666 [21:34<2:12:18, 208.75it/s]

finished frames 57600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9727/1666666 [21:34<2:12:39, 208.17it/s]

finished frames 58200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9836/1666666 [21:35<2:09:37, 213.04it/s]

finished frames 58800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 9924/1666666 [21:35<2:09:23, 213.40it/s]

finished frames 59400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10034/1666666 [21:36<2:11:56, 209.26it/s]

finished frames 60000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10144/1666666 [21:36<2:09:19, 213.49it/s]

finished frames 60600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10232/1666666 [21:37<2:09:16, 213.54it/s]

finished frames 61200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10342/1666666 [21:37<2:09:15, 213.58it/s]

finished frames 61800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10430/1666666 [21:37<2:08:52, 214.19it/s]

finished frames 62400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10540/1666666 [21:38<2:08:38, 214.55it/s]

finished frames 63000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10628/1666666 [21:38<2:08:58, 214.00it/s]

finished frames 63600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10738/1666666 [21:39<2:08:29, 214.80it/s]

finished frames 64200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10826/1666666 [21:39<2:08:57, 213.99it/s]

finished frames 64800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 10936/1666666 [21:40<2:08:08, 215.36it/s]

finished frames 65400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11024/1666666 [21:40<2:11:04, 210.51it/s]

finished frames 66000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11134/1666666 [21:41<2:08:37, 214.53it/s]

finished frames 66600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11244/1666666 [21:41<2:07:39, 216.12it/s]

finished frames 67200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11332/1666666 [21:42<2:07:46, 215.93it/s]

finished frames 67800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11442/1666666 [21:42<2:07:59, 215.53it/s]

finished frames 68400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11530/1666666 [21:43<2:07:48, 215.83it/s]

finished frames 69000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11640/1666666 [21:43<2:07:37, 216.13it/s]

finished frames 69600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11728/1666666 [21:44<2:07:53, 215.66it/s]

finished frames 70200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11838/1666666 [21:44<2:07:29, 216.33it/s]

finished frames 70800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 11926/1666666 [21:45<2:19:35, 197.56it/s]

finished frames 71400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12035/1666666 [21:45<2:12:06, 208.75it/s]

finished frames 72000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12144/1666666 [21:46<2:09:03, 213.65it/s]

finished frames 72600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12232/1666666 [21:46<2:08:12, 215.06it/s]

finished frames 73200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12342/1666666 [21:46<2:08:14, 215.00it/s]

finished frames 73800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12430/1666666 [21:47<2:07:38, 215.99it/s]

finished frames 74400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12540/1666666 [21:47<2:06:51, 217.31it/s]

finished frames 75000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12628/1666666 [21:48<2:06:57, 217.13it/s]

finished frames 75600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12738/1666666 [21:48<2:06:41, 217.59it/s]

finished frames 76200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12826/1666666 [21:49<2:07:16, 216.56it/s]

finished frames 76800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 12936/1666666 [21:49<2:07:53, 215.50it/s]

finished frames 77400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13024/1666666 [21:50<2:11:11, 210.08it/s]

finished frames 78000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13134/1666666 [21:50<2:07:34, 216.02it/s]

finished frames 78600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13244/1666666 [21:51<2:07:36, 215.96it/s]

finished frames 79200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13332/1666666 [21:51<2:07:54, 215.44it/s]

finished frames 79800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13442/1666666 [21:52<2:07:27, 216.18it/s]

finished frames 80400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13530/1666666 [21:52<2:07:47, 215.61it/s]

finished frames 81000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13640/1666666 [21:52<2:07:34, 215.97it/s]

finished frames 81600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13728/1666666 [21:53<2:07:54, 215.39it/s]

finished frames 82200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13837/1666666 [21:53<2:12:38, 207.67it/s]

finished frames 82800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 13924/1666666 [21:54<2:12:08, 208.45it/s]

finished frames 83400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14030/1666666 [21:54<2:14:48, 204.31it/s]

finished frames 84000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14138/1666666 [21:55<2:12:17, 208.19it/s]

finished frames 84600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14222/1666666 [21:55<2:16:59, 201.03it/s]

finished frames 85200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14327/1666666 [21:56<2:24:08, 191.05it/s]

finished frames 85800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14434/1666666 [21:56<2:25:24, 189.38it/s]

finished frames 86400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14542/1666666 [21:57<2:13:06, 206.86it/s]

finished frames 87000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14627/1666666 [21:57<2:12:02, 208.52it/s]

finished frames 87600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14735/1666666 [21:58<2:10:40, 210.69it/s]

finished frames 88200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14822/1666666 [21:58<2:10:39, 210.71it/s]

finished frames 88800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 14931/1666666 [21:59<2:10:51, 210.37it/s]

finished frames 89400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15039/1666666 [21:59<2:14:23, 204.82it/s]

finished frames 90000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15124/1666666 [22:00<2:12:09, 208.27it/s]

finished frames 90600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15232/1666666 [22:00<2:10:59, 210.13it/s]

finished frames 91200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15340/1666666 [22:01<2:11:18, 209.61it/s]

finished frames 91800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15426/1666666 [22:01<2:11:13, 209.71it/s]

finished frames 92400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15535/1666666 [22:02<2:10:26, 210.96it/s]

finished frames 93000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15623/1666666 [22:02<2:10:41, 210.56it/s]

finished frames 93600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15733/1666666 [22:03<2:10:42, 210.52it/s]

finished frames 94200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15842/1666666 [22:03<2:10:23, 211.01it/s]

finished frames 94800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 15929/1666666 [22:04<2:10:36, 210.66it/s]

finished frames 95400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16038/1666666 [22:04<2:13:39, 205.82it/s]

finished frames 96000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16124/1666666 [22:04<2:11:28, 209.23it/s]

finished frames 96600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16232/1666666 [22:05<2:10:53, 210.16it/s]

finished frames 97200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16341/1666666 [22:05<2:10:56, 210.06it/s]

finished frames 97800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16428/1666666 [22:06<2:10:53, 210.12it/s]

finished frames 98400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16537/1666666 [22:06<2:10:55, 210.07it/s]

finished frames 99000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16625/1666666 [22:07<2:10:34, 210.61it/s]

finished frames 99600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16734/1666666 [22:07<2:14:29, 204.47it/s]

finished frames 100200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16841/1666666 [22:08<2:11:47, 208.63it/s]

finished frames 100800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 16928/1666666 [22:08<2:10:50, 210.15it/s]

finished frames 101400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17034/1666666 [22:09<2:14:02, 205.10it/s]

finished frames 102000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17143/1666666 [22:09<2:10:58, 209.91it/s]

finished frames 102600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17231/1666666 [22:10<2:10:32, 210.60it/s]

finished frames 103200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17340/1666666 [22:10<2:10:03, 211.36it/s]

finished frames 103800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17428/1666666 [22:11<2:10:09, 211.18it/s]

finished frames 104400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17538/1666666 [22:11<2:10:12, 211.09it/s]

finished frames 105000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17626/1666666 [22:12<2:10:16, 210.98it/s]

finished frames 105600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17736/1666666 [22:12<2:10:20, 210.85it/s]

finished frames 106200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17824/1666666 [22:13<2:10:09, 211.12it/s]

finished frames 106800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 17934/1666666 [22:13<2:09:44, 211.80it/s]

finished frames 107400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18043/1666666 [22:14<2:12:56, 206.69it/s]

finished frames 108000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18129/1666666 [22:14<2:11:15, 209.31it/s]

finished frames 108600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18237/1666666 [22:15<2:10:41, 210.21it/s]

finished frames 109200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18324/1666666 [22:15<2:10:37, 210.32it/s]

finished frames 109800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18433/1666666 [22:15<2:10:13, 210.94it/s]

finished frames 110400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18543/1666666 [22:16<2:10:18, 210.79it/s]

finished frames 111000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18630/1666666 [22:16<2:10:28, 210.51it/s]

finished frames 111600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18739/1666666 [22:17<2:10:25, 210.58it/s]

finished frames 112200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18825/1666666 [22:17<2:10:54, 209.80it/s]

finished frames 112800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 18932/1666666 [22:18<2:10:25, 210.57it/s]

finished frames 113400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19039/1666666 [22:18<2:13:21, 205.91it/s]

finished frames 114000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19125/1666666 [22:19<2:10:59, 209.63it/s]

finished frames 114600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19233/1666666 [22:19<2:15:30, 202.63it/s]

finished frames 115200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19340/1666666 [22:20<2:11:14, 209.20it/s]

finished frames 115800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19426/1666666 [22:20<2:10:44, 209.97it/s]

finished frames 116400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19536/1666666 [22:21<2:09:25, 212.12it/s]

finished frames 117000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19624/1666666 [22:21<2:10:10, 210.89it/s]

finished frames 117600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19734/1666666 [22:22<2:09:53, 211.33it/s]

finished frames 118200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19822/1666666 [22:22<2:10:15, 210.71it/s]

finished frames 118800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 19930/1666666 [22:23<2:10:15, 210.71it/s]

finished frames 119400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 20039/1666666 [22:23<2:12:55, 206.47it/s]

finished frames 120000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 20125/1666666 [22:24<2:11:10, 209.21it/s]

finished frames 120600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 20234/1666666 [22:24<2:10:00, 211.07it/s]

finished frames 121200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 20322/1666666 [22:25<2:10:10, 210.77it/s]

finished frames 121800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 20431/1666666 [22:25<2:10:20, 210.51it/s]

finished frames 122400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 20541/1666666 [22:26<2:09:47, 211.39it/s]

finished frames 123000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 20629/1666666 [22:26<2:10:06, 210.84it/s]

finished frames 123600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 20739/1666666 [22:27<2:09:38, 211.60it/s]

finished frames 124200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|          | 20827/1666666 [22:27<2:10:27, 210.26it/s]

finished frames 124800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 20933/1666666 [22:27<2:11:18, 208.89it/s]

finished frames 125400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21039/1666666 [22:28<2:13:43, 205.10it/s]

finished frames 126000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21124/1666666 [22:28<2:11:33, 208.48it/s]

finished frames 126600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21232/1666666 [22:29<2:09:37, 211.56it/s]

finished frames 127200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21342/1666666 [22:29<2:09:46, 211.31it/s]

finished frames 127800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21429/1666666 [22:30<2:13:34, 205.29it/s]

finished frames 128400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21536/1666666 [22:30<2:16:09, 201.38it/s]

finished frames 129000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21622/1666666 [22:31<2:11:46, 208.06it/s]

finished frames 129600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21731/1666666 [22:31<2:10:16, 210.45it/s]

finished frames 130200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21840/1666666 [22:32<2:09:44, 211.28it/s]

finished frames 130800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 21928/1666666 [22:32<2:09:46, 211.23it/s]

finished frames 131400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22038/1666666 [22:33<2:12:50, 206.35it/s]

finished frames 132000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22125/1666666 [22:33<2:10:44, 209.66it/s]

finished frames 132600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22235/1666666 [22:34<2:10:06, 210.65it/s]

finished frames 133200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22323/1666666 [22:34<2:09:19, 211.91it/s]

finished frames 133800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22433/1666666 [22:35<2:09:26, 211.71it/s]

finished frames 134400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22543/1666666 [22:35<2:09:22, 211.80it/s]

finished frames 135000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22631/1666666 [22:36<2:09:38, 211.35it/s]

finished frames 135600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22740/1666666 [22:36<2:09:42, 211.23it/s]

finished frames 136200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22828/1666666 [22:37<2:09:39, 211.31it/s]

finished frames 136800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 22938/1666666 [22:37<2:09:16, 211.92it/s]

finished frames 137400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23024/1666666 [22:38<2:12:52, 206.16it/s]

finished frames 138000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23133/1666666 [22:38<2:10:15, 210.30it/s]

finished frames 138600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23242/1666666 [22:39<2:10:09, 210.44it/s]

finished frames 139200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23329/1666666 [22:39<2:09:56, 210.77it/s]

finished frames 139800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23438/1666666 [22:39<2:10:17, 210.19it/s]

finished frames 140400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23525/1666666 [22:40<2:10:24, 210.01it/s]

finished frames 141000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23635/1666666 [22:40<2:09:59, 210.66it/s]

finished frames 141600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23721/1666666 [22:41<2:10:27, 209.89it/s]

finished frames 142200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23829/1666666 [22:41<2:13:05, 205.73it/s]

finished frames 142800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 23937/1666666 [22:42<2:12:57, 205.92it/s]

finished frames 143400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24023/1666666 [22:42<2:13:58, 204.36it/s]

finished frames 144000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24131/1666666 [22:43<2:10:56, 209.08it/s]

finished frames 144600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24240/1666666 [22:43<2:09:57, 210.62it/s]

finished frames 145200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24327/1666666 [22:44<2:09:56, 210.65it/s]

finished frames 145800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24436/1666666 [22:44<2:09:12, 211.84it/s]

finished frames 146400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24524/1666666 [22:45<2:09:33, 211.24it/s]

finished frames 147000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24633/1666666 [22:45<2:09:36, 211.16it/s]

finished frames 147600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24743/1666666 [22:46<2:09:10, 211.86it/s]

finished frames 148200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24831/1666666 [22:46<2:09:39, 211.06it/s]

finished frames 148800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  1%|▏         | 24940/1666666 [22:47<2:10:19, 209.95it/s]

finished frames 149400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25025/1666666 [22:47<2:13:00, 205.71it/s]

finished frames 150000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25133/1666666 [22:48<2:08:49, 212.38it/s]

finished frames 150600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25243/1666666 [22:48<2:07:33, 214.47it/s]

finished frames 151200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25331/1666666 [22:49<2:07:22, 214.77it/s]

finished frames 151800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25441/1666666 [22:49<2:07:03, 215.28it/s]

finished frames 152400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25529/1666666 [22:49<2:07:16, 214.90it/s]

finished frames 153000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25639/1666666 [22:50<2:07:22, 214.72it/s]

finished frames 153600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25727/1666666 [22:50<2:07:18, 214.83it/s]

finished frames 154200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25837/1666666 [22:51<2:07:06, 215.16it/s]

finished frames 154800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 25925/1666666 [22:51<2:07:21, 214.71it/s]

finished frames 155400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26035/1666666 [22:52<2:09:55, 210.45it/s]

finished frames 156000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26123/1666666 [22:52<2:15:44, 201.42it/s]

finished frames 156600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26233/1666666 [22:53<2:20:08, 195.09it/s]

finished frames 157200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26342/1666666 [22:53<2:09:34, 210.97it/s]

finished frames 157800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26430/1666666 [22:54<2:07:56, 213.67it/s]

finished frames 158400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26540/1666666 [22:54<2:07:36, 214.20it/s]

finished frames 159000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26628/1666666 [22:55<2:07:32, 214.31it/s]

finished frames 159600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26738/1666666 [22:55<2:07:42, 214.03it/s]

finished frames 160200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26826/1666666 [22:56<2:07:16, 214.74it/s]

finished frames 160800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 26936/1666666 [22:56<2:07:03, 215.09it/s]

finished frames 161400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27024/1666666 [22:57<2:09:55, 210.34it/s]

finished frames 162000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27134/1666666 [22:57<2:07:53, 213.67it/s]

finished frames 162600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27244/1666666 [22:58<2:07:11, 214.81it/s]

finished frames 163200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27332/1666666 [22:58<2:07:22, 214.51it/s]

finished frames 163800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27442/1666666 [22:58<2:07:09, 214.84it/s]

finished frames 164400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27530/1666666 [22:59<2:07:15, 214.67it/s]

finished frames 165000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27640/1666666 [22:59<2:06:21, 216.18it/s]

finished frames 165600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27728/1666666 [23:00<2:06:48, 215.41it/s]

finished frames 166200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27838/1666666 [23:00<2:06:52, 215.28it/s]

finished frames 166800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 27926/1666666 [23:01<2:07:11, 214.72it/s]

finished frames 167400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28036/1666666 [23:01<2:09:18, 211.20it/s]

finished frames 168000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28124/1666666 [23:02<2:07:08, 214.79it/s]

finished frames 168600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28234/1666666 [23:02<2:06:46, 215.39it/s]

finished frames 169200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28344/1666666 [23:03<2:06:27, 215.91it/s]

finished frames 169800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28432/1666666 [23:03<2:07:19, 214.45it/s]

finished frames 170400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28542/1666666 [23:04<2:07:30, 214.13it/s]

finished frames 171000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28630/1666666 [23:04<2:16:57, 199.32it/s]

finished frames 171600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28740/1666666 [23:05<2:07:55, 213.39it/s]

finished frames 172200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28828/1666666 [23:05<2:06:10, 216.34it/s]

finished frames 172800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 28938/1666666 [23:05<2:05:45, 217.05it/s]

finished frames 173400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29026/1666666 [23:06<2:08:26, 212.50it/s]

finished frames 174000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29136/1666666 [23:06<2:06:14, 216.18it/s]

finished frames 174600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29224/1666666 [23:07<2:05:55, 216.73it/s]

finished frames 175200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29334/1666666 [23:07<2:05:43, 217.05it/s]

finished frames 175800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29422/1666666 [23:08<2:07:05, 214.70it/s]

finished frames 176400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29532/1666666 [23:08<2:05:57, 216.62it/s]

finished frames 177000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29642/1666666 [23:09<2:05:29, 217.41it/s]

finished frames 177600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29730/1666666 [23:09<2:05:32, 217.30it/s]

finished frames 178200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29840/1666666 [23:10<2:05:34, 217.25it/s]

finished frames 178800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 29928/1666666 [23:10<2:05:32, 217.29it/s]

finished frames 179400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30038/1666666 [23:11<2:08:23, 212.46it/s]

finished frames 180000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30126/1666666 [23:11<2:06:17, 215.98it/s]

finished frames 180600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30236/1666666 [23:12<2:05:52, 216.66it/s]

finished frames 181200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30324/1666666 [23:12<2:05:37, 217.09it/s]

finished frames 181800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30434/1666666 [23:12<2:05:58, 216.48it/s]

finished frames 182400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30544/1666666 [23:13<2:05:31, 217.25it/s]

finished frames 183000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30632/1666666 [23:13<2:05:47, 216.76it/s]

finished frames 183600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30742/1666666 [23:14<2:05:34, 217.14it/s]

finished frames 184200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30830/1666666 [23:14<2:05:32, 217.17it/s]

finished frames 184800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 30918/1666666 [23:15<2:05:34, 217.09it/s]

finished frames 185400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31027/1666666 [23:15<2:11:47, 206.84it/s]

finished frames 186000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31136/1666666 [23:16<2:09:18, 210.82it/s]

finished frames 186600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31224/1666666 [23:16<2:06:37, 215.26it/s]

finished frames 187200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31334/1666666 [23:17<2:05:51, 216.56it/s]

finished frames 187800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31444/1666666 [23:17<2:05:26, 217.27it/s]

finished frames 188400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31532/1666666 [23:18<2:05:44, 216.72it/s]

finished frames 189000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31642/1666666 [23:18<2:05:27, 217.19it/s]

finished frames 189600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31730/1666666 [23:18<2:05:06, 217.82it/s]

finished frames 190200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31840/1666666 [23:19<2:05:21, 217.36it/s]

finished frames 190800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 31928/1666666 [23:19<2:05:19, 217.40it/s]

finished frames 191400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32038/1666666 [23:20<2:08:33, 211.93it/s]

finished frames 192000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32126/1666666 [23:20<2:06:43, 214.98it/s]

finished frames 192600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32236/1666666 [23:21<2:05:46, 216.57it/s]

finished frames 193200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32324/1666666 [23:21<2:06:00, 216.16it/s]

finished frames 193800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32434/1666666 [23:22<2:05:55, 216.31it/s]

finished frames 194400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32544/1666666 [23:22<2:05:30, 217.01it/s]

finished frames 195000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32632/1666666 [23:23<2:05:41, 216.66it/s]

finished frames 195600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32742/1666666 [23:23<2:05:24, 217.14it/s]

finished frames 196200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32830/1666666 [23:24<2:05:37, 216.77it/s]

finished frames 196800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 32940/1666666 [23:24<2:05:43, 216.58it/s]

finished frames 197400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33027/1666666 [23:25<2:10:30, 208.64it/s]

finished frames 198000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33136/1666666 [23:25<2:09:06, 210.87it/s]

finished frames 198600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33224/1666666 [23:25<2:09:10, 210.76it/s]

finished frames 199200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33334/1666666 [23:26<2:09:02, 210.96it/s]

finished frames 199800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33422/1666666 [23:26<2:08:33, 211.75it/s]

finished frames 200400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33532/1666666 [23:27<2:12:28, 205.46it/s]

finished frames 201000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33641/1666666 [23:27<2:09:10, 210.70it/s]

finished frames 201600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33729/1666666 [23:28<2:08:28, 211.83it/s]

finished frames 202200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33839/1666666 [23:28<2:08:24, 211.93it/s]

finished frames 202800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 33927/1666666 [23:29<2:08:36, 211.59it/s]

finished frames 203400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34037/1666666 [23:29<2:11:31, 206.89it/s]

finished frames 204000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34124/1666666 [23:30<2:09:41, 209.81it/s]

finished frames 204600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34234/1666666 [23:30<2:08:39, 211.47it/s]

finished frames 205200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34322/1666666 [23:31<2:08:49, 211.19it/s]

finished frames 205800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34432/1666666 [23:31<2:08:37, 211.51it/s]

finished frames 206400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34542/1666666 [23:32<2:08:52, 211.08it/s]

finished frames 207000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34630/1666666 [23:32<2:08:35, 211.54it/s]

finished frames 207600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34740/1666666 [23:33<2:08:22, 211.87it/s]

finished frames 208200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34828/1666666 [23:33<2:08:26, 211.76it/s]

finished frames 208800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 34938/1666666 [23:34<2:08:27, 211.70it/s]

finished frames 209400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35025/1666666 [23:34<2:11:32, 206.74it/s]

finished frames 210000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35134/1666666 [23:35<2:08:45, 211.18it/s]

finished frames 210600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35222/1666666 [23:35<2:08:47, 211.12it/s]

finished frames 211200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35332/1666666 [23:35<2:08:22, 211.78it/s]

finished frames 211800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35442/1666666 [23:36<2:08:19, 211.86it/s]

finished frames 212400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35530/1666666 [23:36<2:08:28, 211.61it/s]

finished frames 213000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35640/1666666 [23:37<2:08:36, 211.36it/s]

finished frames 213600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35728/1666666 [23:37<2:08:33, 211.45it/s]

finished frames 214200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35816/1666666 [23:38<2:08:51, 210.93it/s]

finished frames 214800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 35925/1666666 [23:38<2:10:49, 207.74it/s]

finished frames 215400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36033/1666666 [23:39<2:12:27, 205.18it/s]

finished frames 216000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36142/1666666 [23:39<2:09:17, 210.20it/s]

finished frames 216600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36230/1666666 [23:40<2:08:31, 211.44it/s]

finished frames 217200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36340/1666666 [23:40<2:08:19, 211.75it/s]

finished frames 217800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36428/1666666 [23:41<2:08:27, 211.51it/s]

finished frames 218400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36538/1666666 [23:41<2:08:16, 211.79it/s]

finished frames 219000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36626/1666666 [23:42<2:08:17, 211.75it/s]

finished frames 219600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36736/1666666 [23:42<2:08:12, 211.89it/s]

finished frames 220200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36824/1666666 [23:43<2:08:19, 211.69it/s]

finished frames 220800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 36934/1666666 [23:43<2:08:10, 211.91it/s]

finished frames 221400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37022/1666666 [23:44<2:12:43, 204.64it/s]

finished frames 222000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37128/1666666 [23:44<2:10:23, 208.30it/s]

finished frames 222600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37237/1666666 [23:45<2:08:57, 210.59it/s]

finished frames 223200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37325/1666666 [23:45<2:08:40, 211.03it/s]

finished frames 223800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37435/1666666 [23:46<2:08:41, 211.00it/s]

finished frames 224400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37523/1666666 [23:46<2:08:39, 211.05it/s]

finished frames 225000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37633/1666666 [23:46<2:08:55, 210.59it/s]

finished frames 225600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37740/1666666 [23:47<2:09:53, 209.01it/s]

finished frames 226200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37827/1666666 [23:47<2:09:08, 210.23it/s]

finished frames 226800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 37937/1666666 [23:48<2:08:18, 211.56it/s]

finished frames 227400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38024/1666666 [23:48<2:11:33, 206.32it/s]

finished frames 228000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38132/1666666 [23:49<2:09:12, 210.06it/s]

finished frames 228600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38241/1666666 [23:49<2:13:03, 203.96it/s]

finished frames 229200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38328/1666666 [23:50<2:09:38, 209.34it/s]

finished frames 229800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38437/1666666 [23:50<2:08:17, 211.52it/s]

finished frames 230400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38525/1666666 [23:51<2:08:47, 210.69it/s]

finished frames 231000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38635/1666666 [23:51<2:08:35, 211.01it/s]

finished frames 231600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38723/1666666 [23:52<2:08:41, 210.83it/s]

finished frames 232200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38832/1666666 [23:52<2:08:39, 210.87it/s]

finished frames 232800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 38942/1666666 [23:53<2:08:10, 211.65it/s]

finished frames 233400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39029/1666666 [23:53<2:12:27, 204.80it/s]

finished frames 234000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39136/1666666 [23:54<2:12:13, 205.13it/s]

finished frames 234600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39241/1666666 [23:54<2:11:00, 207.05it/s]

finished frames 235200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39325/1666666 [23:55<2:10:02, 208.56it/s]

finished frames 235800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39431/1666666 [23:55<2:09:44, 209.04it/s]

finished frames 236400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39537/1666666 [23:56<2:12:20, 204.91it/s]

finished frames 237000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39643/1666666 [23:56<2:10:20, 208.03it/s]

finished frames 237600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39728/1666666 [23:57<2:09:40, 209.12it/s]

finished frames 238200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39835/1666666 [23:57<2:09:16, 209.74it/s]

finished frames 238800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 39922/1666666 [23:57<2:08:56, 210.26it/s]

finished frames 239400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40029/1666666 [23:58<2:12:33, 204.51it/s]

finished frames 240000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40137/1666666 [23:58<2:09:39, 209.07it/s]

finished frames 240600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40222/1666666 [23:59<2:09:33, 209.24it/s]

finished frames 241200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40329/1666666 [23:59<2:09:14, 209.73it/s]

finished frames 241800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40437/1666666 [24:00<2:09:03, 210.00it/s]

finished frames 242400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40524/1666666 [24:00<2:08:33, 210.81it/s]

finished frames 243000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40632/1666666 [24:01<2:09:18, 209.59it/s]

finished frames 243600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40741/1666666 [24:01<2:08:49, 210.35it/s]

finished frames 244200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40829/1666666 [24:02<2:08:31, 210.83it/s]

finished frames 244800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 40936/1666666 [24:02<2:09:26, 209.33it/s]

finished frames 245400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 41041/1666666 [24:03<2:12:34, 204.36it/s]

finished frames 246000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 41125/1666666 [24:03<2:11:21, 206.26it/s]

finished frames 246600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 41231/1666666 [24:04<2:09:57, 208.44it/s]

finished frames 247200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 41337/1666666 [24:04<2:09:50, 208.64it/s]

finished frames 247800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 41442/1666666 [24:05<2:09:29, 209.18it/s]

finished frames 248400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 41527/1666666 [24:05<2:09:49, 208.62it/s]

finished frames 249000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  2%|▏         | 41632/1666666 [24:06<2:10:09, 208.08it/s]

finished frames 249600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 41737/1666666 [24:06<2:11:14, 206.35it/s]

finished frames 250200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 41823/1666666 [24:07<2:10:10, 208.04it/s]

finished frames 250800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 41928/1666666 [24:07<2:10:22, 207.70it/s]

finished frames 251400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42035/1666666 [24:08<2:12:06, 204.96it/s]

finished frames 252000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42144/1666666 [24:08<2:07:46, 211.91it/s]

finished frames 252600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42232/1666666 [24:09<2:06:51, 213.41it/s]

finished frames 253200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42342/1666666 [24:09<2:06:09, 214.60it/s]

finished frames 253800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42430/1666666 [24:09<2:06:41, 213.67it/s]

finished frames 254400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42540/1666666 [24:10<2:06:59, 213.17it/s]

finished frames 255000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42628/1666666 [24:10<2:05:45, 215.23it/s]

finished frames 255600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42738/1666666 [24:11<2:06:47, 213.45it/s]

finished frames 256200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42826/1666666 [24:11<2:17:52, 196.30it/s]

finished frames 256800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 42936/1666666 [24:12<2:08:05, 211.28it/s]

finished frames 257400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43024/1666666 [24:12<2:09:39, 208.70it/s]

finished frames 258000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43133/1666666 [24:13<2:07:13, 212.67it/s]

finished frames 258600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43243/1666666 [24:13<2:07:05, 212.89it/s]

finished frames 259200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43331/1666666 [24:14<2:06:55, 213.17it/s]

finished frames 259800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43441/1666666 [24:14<2:06:05, 214.55it/s]

finished frames 260400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43529/1666666 [24:15<2:06:40, 213.57it/s]

finished frames 261000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43639/1666666 [24:15<2:06:40, 213.54it/s]

finished frames 261600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43727/1666666 [24:16<2:06:20, 214.10it/s]

finished frames 262200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43837/1666666 [24:16<2:06:04, 214.52it/s]

finished frames 262800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 43925/1666666 [24:17<2:06:12, 214.28it/s]

finished frames 263400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44035/1666666 [24:17<2:09:05, 209.50it/s]

finished frames 264000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44144/1666666 [24:18<2:06:29, 213.80it/s]

finished frames 264600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44232/1666666 [24:18<2:05:56, 214.71it/s]

finished frames 265200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44342/1666666 [24:19<2:05:33, 215.36it/s]

finished frames 265800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44430/1666666 [24:19<2:05:49, 214.88it/s]

finished frames 266400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44540/1666666 [24:19<2:06:02, 214.49it/s]

finished frames 267000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44628/1666666 [24:20<2:05:56, 214.66it/s]

finished frames 267600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44738/1666666 [24:20<2:06:09, 214.26it/s]

finished frames 268200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44826/1666666 [24:21<2:06:09, 214.26it/s]

finished frames 268800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 44936/1666666 [24:21<2:05:56, 214.62it/s]

finished frames 269400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45024/1666666 [24:22<2:08:59, 209.53it/s]

finished frames 270000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45134/1666666 [24:22<2:06:53, 212.97it/s]

finished frames 270600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45244/1666666 [24:23<2:09:47, 208.22it/s]

finished frames 271200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45331/1666666 [24:23<2:07:33, 211.85it/s]

finished frames 271800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45441/1666666 [24:24<2:06:00, 214.42it/s]

finished frames 272400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45529/1666666 [24:24<2:05:58, 214.48it/s]

finished frames 273000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45639/1666666 [24:25<2:06:11, 214.09it/s]

finished frames 273600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45727/1666666 [24:25<2:06:11, 214.08it/s]

finished frames 274200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45837/1666666 [24:26<2:05:24, 215.40it/s]

finished frames 274800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 45925/1666666 [24:26<2:05:02, 216.03it/s]

finished frames 275400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46035/1666666 [24:26<2:08:24, 210.34it/s]

finished frames 276000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46123/1666666 [24:27<2:05:43, 214.82it/s]

finished frames 276600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46233/1666666 [24:27<2:04:54, 216.21it/s]

finished frames 277200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46343/1666666 [24:28<2:05:24, 215.35it/s]

finished frames 277800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46431/1666666 [24:28<2:05:12, 215.67it/s]

finished frames 278400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46541/1666666 [24:29<2:04:54, 216.19it/s]

finished frames 279000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46629/1666666 [24:29<2:05:14, 215.58it/s]

finished frames 279600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46739/1666666 [24:30<2:05:01, 215.95it/s]

finished frames 280200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46827/1666666 [24:30<2:05:16, 215.51it/s]

finished frames 280800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 46937/1666666 [24:31<2:05:21, 215.35it/s]

finished frames 281400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47025/1666666 [24:31<2:07:55, 211.01it/s]

finished frames 282000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47135/1666666 [24:32<2:05:38, 214.84it/s]

finished frames 282600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47223/1666666 [24:32<2:05:24, 215.22it/s]

finished frames 283200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47333/1666666 [24:33<2:05:20, 215.32it/s]

finished frames 283800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47443/1666666 [24:33<2:05:17, 215.41it/s]

finished frames 284400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47531/1666666 [24:33<2:05:16, 215.42it/s]

finished frames 285000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47641/1666666 [24:34<2:10:28, 206.81it/s]

finished frames 285600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47729/1666666 [24:34<2:13:55, 201.46it/s]

finished frames 286200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47839/1666666 [24:35<2:06:41, 212.97it/s]

finished frames 286800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 47927/1666666 [24:35<2:05:29, 214.99it/s]

finished frames 287400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48037/1666666 [24:36<2:08:13, 210.38it/s]

finished frames 288000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48125/1666666 [24:36<2:06:00, 214.07it/s]

finished frames 288600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48235/1666666 [24:37<2:05:25, 215.06it/s]

finished frames 289200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48323/1666666 [24:37<2:05:24, 215.07it/s]

finished frames 289800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48433/1666666 [24:38<2:04:58, 215.81it/s]

finished frames 290400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48543/1666666 [24:38<2:04:42, 216.25it/s]

finished frames 291000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48631/1666666 [24:39<2:04:19, 216.92it/s]

finished frames 291600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48741/1666666 [24:39<2:04:31, 216.54it/s]

finished frames 292200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48829/1666666 [24:40<2:04:43, 216.19it/s]

finished frames 292800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 48939/1666666 [24:40<2:04:28, 216.60it/s]

finished frames 293400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49027/1666666 [24:40<2:07:22, 211.67it/s]

finished frames 294000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49137/1666666 [24:41<2:05:05, 215.51it/s]

finished frames 294600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49225/1666666 [24:41<2:04:40, 216.23it/s]

finished frames 295200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49335/1666666 [24:42<2:06:03, 213.83it/s]

finished frames 295800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49423/1666666 [24:42<2:06:52, 212.45it/s]

finished frames 296400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49533/1666666 [24:43<2:06:53, 212.39it/s]

finished frames 297000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49643/1666666 [24:43<2:06:58, 212.26it/s]

finished frames 297600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49731/1666666 [24:44<2:07:03, 212.10it/s]

finished frames 298200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49841/1666666 [24:44<2:06:45, 212.58it/s]

finished frames 298800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 49929/1666666 [24:45<2:06:48, 212.50it/s]

finished frames 299400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50037/1666666 [24:45<2:14:51, 199.79it/s]

finished frames 300000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50123/1666666 [24:46<2:24:21, 186.63it/s]

finished frames 300600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50232/1666666 [24:46<2:09:52, 207.43it/s]

finished frames 301200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50339/1666666 [24:47<2:11:03, 205.54it/s]

finished frames 301800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50426/1666666 [24:47<2:08:07, 210.25it/s]

finished frames 302400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50536/1666666 [24:48<2:06:41, 212.61it/s]

finished frames 303000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50624/1666666 [24:48<2:06:29, 212.94it/s]

finished frames 303600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50734/1666666 [24:49<2:08:02, 210.33it/s]

finished frames 304200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50822/1666666 [24:49<2:07:59, 210.40it/s]

finished frames 304800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 50931/1666666 [24:50<2:08:03, 210.28it/s]

finished frames 305400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51038/1666666 [24:50<2:11:26, 204.85it/s]

finished frames 306000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51124/1666666 [24:51<2:08:31, 209.49it/s]

finished frames 306600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51232/1666666 [24:51<2:07:49, 210.62it/s]

finished frames 307200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51339/1666666 [24:52<2:08:21, 209.75it/s]

finished frames 307800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51424/1666666 [24:52<2:08:34, 209.37it/s]

finished frames 308400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51529/1666666 [24:52<2:08:51, 208.91it/s]

finished frames 309000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51635/1666666 [24:53<2:08:41, 209.17it/s]

finished frames 309600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51741/1666666 [24:53<2:09:00, 208.63it/s]

finished frames 310200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51826/1666666 [24:54<2:08:48, 208.94it/s]

finished frames 310800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 51931/1666666 [24:54<2:09:04, 208.51it/s]

finished frames 311400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52037/1666666 [24:55<2:12:02, 203.82it/s]

finished frames 312000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52122/1666666 [24:55<2:09:30, 207.79it/s]

finished frames 312600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52229/1666666 [24:56<2:08:39, 209.13it/s]

finished frames 313200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52337/1666666 [24:56<2:08:20, 209.64it/s]

finished frames 313800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52423/1666666 [24:57<2:08:17, 209.72it/s]

finished frames 314400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52529/1666666 [24:57<2:08:27, 209.44it/s]

finished frames 315000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52636/1666666 [24:58<2:08:18, 209.65it/s]

finished frames 315600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52722/1666666 [24:58<2:08:09, 209.90it/s]

finished frames 316200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52830/1666666 [24:59<2:08:16, 209.69it/s]

finished frames 316800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 52938/1666666 [24:59<2:08:14, 209.73it/s]

finished frames 317400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53022/1666666 [25:00<2:11:23, 204.69it/s]

finished frames 318000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53129/1666666 [25:00<2:08:28, 209.32it/s]

finished frames 318600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53238/1666666 [25:01<2:08:07, 209.88it/s]

finished frames 319200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53323/1666666 [25:01<2:08:30, 209.24it/s]

finished frames 319800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53429/1666666 [25:02<2:08:14, 209.65it/s]

finished frames 320400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53535/1666666 [25:02<2:09:04, 208.28it/s]

finished frames 321000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53643/1666666 [25:03<2:08:13, 209.66it/s]

finished frames 321600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53727/1666666 [25:03<2:08:39, 208.95it/s]

finished frames 322200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53834/1666666 [25:03<2:08:02, 209.94it/s]

finished frames 322800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 53942/1666666 [25:04<2:07:33, 210.71it/s]

finished frames 323400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54030/1666666 [25:04<2:08:00, 209.96it/s]

finished frames 324000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54140/1666666 [25:05<2:04:44, 215.46it/s]

finished frames 324600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54228/1666666 [25:05<2:04:18, 216.18it/s]

finished frames 325200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54338/1666666 [25:06<2:04:24, 215.99it/s]

finished frames 325800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54426/1666666 [25:06<2:04:16, 216.22it/s]

finished frames 326400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54536/1666666 [25:07<2:04:54, 215.11it/s]

finished frames 327000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54624/1666666 [25:07<2:04:53, 215.13it/s]

finished frames 327600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54733/1666666 [25:08<2:13:50, 200.73it/s]

finished frames 328200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54821/1666666 [25:08<2:17:44, 195.02it/s]

finished frames 328800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 54931/1666666 [25:09<2:06:46, 211.88it/s]

finished frames 329400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55041/1666666 [25:09<2:07:45, 210.24it/s]

finished frames 330000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55129/1666666 [25:10<2:05:47, 213.53it/s]

finished frames 330600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55239/1666666 [25:10<2:05:08, 214.61it/s]

finished frames 331200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55327/1666666 [25:11<2:05:25, 214.12it/s]

finished frames 331800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55437/1666666 [25:11<2:04:46, 215.21it/s]

finished frames 332400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55525/1666666 [25:11<2:05:03, 214.71it/s]

finished frames 333000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55635/1666666 [25:12<2:04:46, 215.21it/s]

finished frames 333600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55723/1666666 [25:12<2:05:05, 214.64it/s]

finished frames 334200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55833/1666666 [25:13<2:04:43, 215.25it/s]

finished frames 334800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 55943/1666666 [25:13<2:04:43, 215.23it/s]

finished frames 335400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56031/1666666 [25:14<2:07:39, 210.27it/s]

finished frames 336000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56141/1666666 [25:14<2:05:03, 214.64it/s]

finished frames 336600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56229/1666666 [25:15<2:04:53, 214.91it/s]

finished frames 337200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56339/1666666 [25:15<2:04:43, 215.18it/s]

finished frames 337800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56427/1666666 [25:16<2:04:48, 215.04it/s]

finished frames 338400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56537/1666666 [25:16<2:05:02, 214.60it/s]

finished frames 339000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56625/1666666 [25:17<2:04:46, 215.06it/s]

finished frames 339600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56735/1666666 [25:17<2:04:49, 214.97it/s]

finished frames 340200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56823/1666666 [25:18<2:04:47, 215.02it/s]

finished frames 340800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 56933/1666666 [25:18<2:04:57, 214.71it/s]

finished frames 341400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57021/1666666 [25:18<2:07:55, 209.72it/s]

finished frames 342000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57131/1666666 [25:19<2:12:56, 201.78it/s]

finished frames 342600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57218/1666666 [25:19<2:17:39, 194.87it/s]

finished frames 343200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57328/1666666 [25:20<2:06:34, 211.90it/s]

finished frames 343800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57438/1666666 [25:20<2:05:14, 214.16it/s]

finished frames 344400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57526/1666666 [25:21<2:05:00, 214.53it/s]

finished frames 345000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57636/1666666 [25:21<2:04:39, 215.13it/s]

finished frames 345600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57724/1666666 [25:22<2:04:47, 214.89it/s]

finished frames 346200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57834/1666666 [25:22<2:04:38, 215.13it/s]

finished frames 346800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 57944/1666666 [25:23<2:04:06, 216.03it/s]

finished frames 347400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 58032/1666666 [25:23<2:05:37, 213.43it/s]

finished frames 348000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 58142/1666666 [25:24<2:03:27, 217.16it/s]

finished frames 348600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  3%|▎         | 58230/1666666 [25:24<2:03:28, 217.09it/s]

finished frames 349200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 58340/1666666 [25:25<2:02:48, 218.28it/s]

finished frames 349800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 58428/1666666 [25:25<2:03:36, 216.86it/s]

finished frames 350400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 58538/1666666 [25:26<2:04:14, 215.72it/s]

finished frames 351000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 58626/1666666 [25:26<2:04:32, 215.19it/s]

finished frames 351600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 58736/1666666 [25:26<2:04:28, 215.29it/s]

finished frames 352200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 58824/1666666 [25:27<2:04:14, 215.69it/s]

finished frames 352800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 58934/1666666 [25:27<2:04:23, 215.42it/s]

finished frames 353400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59022/1666666 [25:28<2:08:50, 207.97it/s]

finished frames 354000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59132/1666666 [25:28<2:04:19, 215.49it/s]

finished frames 354600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59242/1666666 [25:29<2:03:15, 217.35it/s]

finished frames 355200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59332/1666666 [25:29<2:02:06, 219.39it/s]

finished frames 355800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59443/1666666 [25:30<2:02:20, 218.97it/s]

finished frames 356400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59531/1666666 [25:30<2:13:20, 200.88it/s]

finished frames 357000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59621/1666666 [25:31<2:04:43, 214.75it/s]

finished frames 357600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59731/1666666 [25:31<2:05:52, 212.78it/s]

finished frames 358200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59841/1666666 [25:32<2:03:27, 216.93it/s]

finished frames 358800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 59930/1666666 [25:32<2:02:45, 218.14it/s]

finished frames 359400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60040/1666666 [25:33<2:05:51, 212.76it/s]

finished frames 360000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60128/1666666 [25:33<2:03:52, 216.16it/s]

finished frames 360600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60239/1666666 [25:34<2:02:59, 217.69it/s]

finished frames 361200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60327/1666666 [25:34<2:03:12, 217.29it/s]

finished frames 361800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60439/1666666 [25:34<2:02:13, 219.01it/s]

finished frames 362400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60529/1666666 [25:35<2:01:58, 219.47it/s]

finished frames 363000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60640/1666666 [25:35<2:02:44, 218.08it/s]

finished frames 363600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60728/1666666 [25:36<2:02:52, 217.82it/s]

finished frames 364200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60838/1666666 [25:36<2:02:31, 218.43it/s]

finished frames 364800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 60926/1666666 [25:37<2:02:50, 217.87it/s]

finished frames 365400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61036/1666666 [25:37<2:05:44, 212.82it/s]

finished frames 366000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61124/1666666 [25:38<2:03:44, 216.24it/s]

finished frames 366600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61237/1666666 [25:38<2:01:59, 219.35it/s]

finished frames 367200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61325/1666666 [25:38<2:02:12, 218.92it/s]

finished frames 367800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61437/1666666 [25:39<2:01:36, 220.01it/s]

finished frames 368400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61527/1666666 [25:39<2:01:36, 219.98it/s]

finished frames 369000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61638/1666666 [25:40<2:02:15, 218.80it/s]

finished frames 369600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61726/1666666 [25:40<2:02:48, 217.81it/s]

finished frames 370200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61836/1666666 [25:41<2:03:23, 216.78it/s]

finished frames 370800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 61924/1666666 [25:41<2:03:00, 217.42it/s]

finished frames 371400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 62033/1666666 [25:42<2:09:36, 206.34it/s]

finished frames 372000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 62144/1666666 [25:42<2:03:59, 215.69it/s]

finished frames 372600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 62232/1666666 [25:43<2:02:56, 217.50it/s]

finished frames 373200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 62342/1666666 [25:43<2:02:23, 218.48it/s]

finished frames 373800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▎         | 62430/1666666 [25:44<2:02:42, 217.89it/s]

finished frames 374400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 62540/1666666 [25:44<2:02:44, 217.81it/s]

finished frames 375000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 62628/1666666 [25:45<2:02:48, 217.70it/s]

finished frames 375600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 62738/1666666 [25:45<2:02:54, 217.49it/s]

finished frames 376200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 62826/1666666 [25:45<2:03:09, 217.05it/s]

finished frames 376800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 62937/1666666 [25:46<2:02:48, 217.64it/s]

finished frames 377400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63025/1666666 [25:46<2:05:23, 213.15it/s]

finished frames 378000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63135/1666666 [25:47<2:03:42, 216.03it/s]

finished frames 378600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63224/1666666 [25:47<2:02:30, 218.13it/s]

finished frames 379200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63335/1666666 [25:48<2:02:14, 218.59it/s]

finished frames 379800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63423/1666666 [25:48<2:02:58, 217.28it/s]

finished frames 380400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63533/1666666 [25:49<2:02:48, 217.56it/s]

finished frames 381000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63644/1666666 [25:49<2:02:31, 218.07it/s]

finished frames 381600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63732/1666666 [25:50<2:03:00, 217.19it/s]

finished frames 382200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63842/1666666 [25:50<2:02:33, 217.98it/s]

finished frames 382800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 63930/1666666 [25:51<2:02:48, 217.51it/s]

finished frames 383400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64040/1666666 [25:51<2:05:14, 213.28it/s]

finished frames 384000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64128/1666666 [25:51<2:03:25, 216.41it/s]

finished frames 384600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64238/1666666 [25:52<2:02:36, 217.83it/s]

finished frames 385200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64326/1666666 [25:52<2:02:26, 218.12it/s]

finished frames 385800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64437/1666666 [25:53<2:02:16, 218.39it/s]

finished frames 386400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64525/1666666 [25:53<2:03:20, 216.48it/s]

finished frames 387000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64634/1666666 [25:54<2:06:52, 210.44it/s]

finished frames 387600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64744/1666666 [25:54<2:04:37, 214.22it/s]

finished frames 388200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64832/1666666 [25:55<2:04:02, 215.23it/s]

finished frames 388800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 64942/1666666 [25:55<2:04:52, 213.76it/s]

finished frames 389400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65029/1666666 [25:56<2:12:09, 201.99it/s]

finished frames 390000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65138/1666666 [25:56<2:06:46, 210.54it/s]

finished frames 390600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65226/1666666 [25:57<2:05:54, 211.99it/s]

finished frames 391200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65336/1666666 [25:57<2:05:12, 213.14it/s]

finished frames 391800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65424/1666666 [25:58<2:04:53, 213.69it/s]

finished frames 392400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65534/1666666 [25:58<2:03:42, 215.70it/s]

finished frames 393000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65644/1666666 [25:59<2:03:32, 215.98it/s]

finished frames 393600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65732/1666666 [25:59<2:03:58, 215.22it/s]

finished frames 394200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65842/1666666 [25:59<2:03:55, 215.30it/s]

finished frames 394800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 65930/1666666 [26:00<2:03:56, 215.27it/s]

finished frames 395400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66040/1666666 [26:00<2:06:41, 210.57it/s]

finished frames 396000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66128/1666666 [26:01<2:04:50, 213.68it/s]

finished frames 396600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66238/1666666 [26:01<2:04:18, 214.58it/s]

finished frames 397200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66326/1666666 [26:02<2:04:04, 214.97it/s]

finished frames 397800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66436/1666666 [26:02<2:04:03, 214.98it/s]

finished frames 398400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66524/1666666 [26:03<2:04:04, 214.95it/s]

finished frames 399000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66634/1666666 [26:03<2:04:01, 215.02it/s]

finished frames 399600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66744/1666666 [26:04<2:03:41, 215.58it/s]

finished frames 400200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66832/1666666 [26:04<2:11:43, 202.42it/s]

finished frames 400800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 66942/1666666 [26:05<2:04:52, 213.52it/s]

finished frames 401400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67030/1666666 [26:05<2:07:11, 209.61it/s]

finished frames 402000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67140/1666666 [26:06<2:04:19, 214.43it/s]

finished frames 402600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67228/1666666 [26:06<2:03:51, 215.22it/s]

finished frames 403200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67338/1666666 [26:07<2:03:30, 215.82it/s]

finished frames 403800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67426/1666666 [26:07<2:03:50, 215.22it/s]

finished frames 404400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67536/1666666 [26:07<2:03:30, 215.80it/s]

finished frames 405000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67624/1666666 [26:08<2:04:44, 213.64it/s]

finished frames 405600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67734/1666666 [26:08<2:03:38, 215.55it/s]

finished frames 406200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67844/1666666 [26:09<2:03:35, 215.60it/s]

finished frames 406800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 67932/1666666 [26:09<2:03:50, 215.15it/s]

finished frames 407400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68042/1666666 [26:10<2:06:15, 211.03it/s]

finished frames 408000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68130/1666666 [26:10<2:04:01, 214.81it/s]

finished frames 408600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68240/1666666 [26:11<2:03:30, 215.68it/s]

finished frames 409200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68328/1666666 [26:11<2:03:35, 215.54it/s]

finished frames 409800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68438/1666666 [26:12<2:03:28, 215.73it/s]

finished frames 410400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68526/1666666 [26:12<2:03:26, 215.79it/s]

finished frames 411000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68636/1666666 [26:13<2:03:48, 215.13it/s]

finished frames 411600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68724/1666666 [26:13<2:03:40, 215.34it/s]

finished frames 412200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68834/1666666 [26:13<2:03:33, 215.53it/s]

finished frames 412800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 68944/1666666 [26:14<2:03:23, 215.81it/s]

finished frames 413400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69032/1666666 [26:14<2:06:15, 210.90it/s]

finished frames 414000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69142/1666666 [26:15<2:03:41, 215.25it/s]

finished frames 414600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69230/1666666 [26:15<2:14:37, 197.77it/s]

finished frames 415200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69340/1666666 [26:16<2:06:04, 211.15it/s]

finished frames 415800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69428/1666666 [26:16<2:03:49, 214.98it/s]

finished frames 416400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69538/1666666 [26:17<2:03:42, 215.18it/s]

finished frames 417000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69626/1666666 [26:17<2:03:26, 215.62it/s]

finished frames 417600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69736/1666666 [26:18<2:03:01, 216.34it/s]

finished frames 418200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69824/1666666 [26:18<2:03:19, 215.79it/s]

finished frames 418800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 69934/1666666 [26:19<2:02:42, 216.87it/s]

finished frames 419400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70022/1666666 [26:19<2:09:23, 205.67it/s]

finished frames 420000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70129/1666666 [26:20<2:07:20, 208.97it/s]

finished frames 420600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70237/1666666 [26:20<2:06:30, 210.32it/s]

finished frames 421200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70324/1666666 [26:21<2:05:48, 211.48it/s]

finished frames 421800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70434/1666666 [26:21<2:04:05, 214.39it/s]

finished frames 422400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70544/1666666 [26:22<2:03:50, 214.82it/s]

finished frames 423000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70632/1666666 [26:22<2:03:58, 214.57it/s]

finished frames 423600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70742/1666666 [26:22<2:03:25, 215.50it/s]

finished frames 424200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70830/1666666 [26:23<2:03:06, 216.04it/s]

finished frames 424800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 70940/1666666 [26:23<2:02:45, 216.64it/s]

finished frames 425400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71028/1666666 [26:24<2:05:59, 211.08it/s]

finished frames 426000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71138/1666666 [26:24<2:03:23, 215.52it/s]

finished frames 426600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71226/1666666 [26:25<2:03:02, 216.10it/s]

finished frames 427200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71336/1666666 [26:25<2:02:46, 216.56it/s]

finished frames 427800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71424/1666666 [26:26<2:02:56, 216.25it/s]

finished frames 428400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71534/1666666 [26:26<2:02:57, 216.21it/s]

finished frames 429000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71644/1666666 [26:27<2:02:36, 216.82it/s]

finished frames 429600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71732/1666666 [26:27<2:02:50, 216.40it/s]

finished frames 430200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71842/1666666 [26:28<2:05:49, 211.24it/s]

finished frames 430800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 71930/1666666 [26:28<2:03:34, 215.08it/s]

finished frames 431400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72040/1666666 [26:29<2:05:30, 211.76it/s]

finished frames 432000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72128/1666666 [26:29<2:03:34, 215.05it/s]

finished frames 432600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72238/1666666 [26:29<2:03:30, 215.16it/s]

finished frames 433200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72326/1666666 [26:30<2:02:55, 216.15it/s]

finished frames 433800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72436/1666666 [26:30<2:03:04, 215.90it/s]

finished frames 434400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72524/1666666 [26:31<2:03:13, 215.61it/s]

finished frames 435000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72634/1666666 [26:31<2:03:20, 215.38it/s]

finished frames 435600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72744/1666666 [26:32<2:03:09, 215.69it/s]

finished frames 436200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72832/1666666 [26:32<2:02:57, 216.04it/s]

finished frames 436800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 72942/1666666 [26:33<2:02:41, 216.49it/s]

finished frames 437400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73030/1666666 [26:33<2:05:58, 210.85it/s]

finished frames 438000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73140/1666666 [26:34<2:03:44, 214.65it/s]

finished frames 438600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73228/1666666 [26:34<2:03:00, 215.91it/s]

finished frames 439200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73338/1666666 [26:35<2:02:57, 215.97it/s]

finished frames 439800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73426/1666666 [26:35<2:03:01, 215.83it/s]

finished frames 440400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73536/1666666 [26:35<2:02:48, 216.20it/s]

finished frames 441000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73624/1666666 [26:36<2:03:15, 215.39it/s]

finished frames 441600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73734/1666666 [26:36<2:03:24, 215.12it/s]

finished frames 442200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73844/1666666 [26:37<2:03:11, 215.49it/s]

finished frames 442800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 73932/1666666 [26:37<2:03:08, 215.57it/s]

finished frames 443400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74020/1666666 [26:38<2:06:38, 209.59it/s]

finished frames 444000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74129/1666666 [26:38<2:06:39, 209.56it/s]

finished frames 444600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74239/1666666 [26:39<2:07:43, 207.79it/s]

finished frames 445200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74326/1666666 [26:39<2:04:37, 212.95it/s]

finished frames 445800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74436/1666666 [26:40<2:03:24, 215.04it/s]

finished frames 446400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74524/1666666 [26:40<2:03:29, 214.88it/s]

finished frames 447000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74634/1666666 [26:41<2:03:27, 214.91it/s]

finished frames 447600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74744/1666666 [26:41<2:02:57, 215.77it/s]

finished frames 448200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74832/1666666 [26:42<2:03:12, 215.32it/s]

finished frames 448800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  4%|▍         | 74942/1666666 [26:42<2:02:26, 216.66it/s]

finished frames 449400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75030/1666666 [26:43<2:05:45, 210.95it/s]

finished frames 450000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75140/1666666 [26:43<2:03:26, 214.87it/s]

finished frames 450600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75228/1666666 [26:43<2:03:12, 215.28it/s]

finished frames 451200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75338/1666666 [26:44<2:02:53, 215.81it/s]

finished frames 451800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75426/1666666 [26:44<2:02:19, 216.80it/s]

finished frames 452400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75536/1666666 [26:45<2:01:57, 217.45it/s]

finished frames 453000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75624/1666666 [26:45<2:02:21, 216.70it/s]

finished frames 453600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75734/1666666 [26:46<2:02:07, 217.12it/s]

finished frames 454200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75844/1666666 [26:46<2:01:44, 217.79it/s]

finished frames 454800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 75932/1666666 [26:47<2:03:10, 215.25it/s]

finished frames 455400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76042/1666666 [26:47<2:05:07, 211.87it/s]

finished frames 456000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76130/1666666 [26:48<2:02:55, 215.66it/s]

finished frames 456600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76240/1666666 [26:48<2:02:04, 217.14it/s]

finished frames 457200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76328/1666666 [26:49<2:02:24, 216.54it/s]

finished frames 457800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76438/1666666 [26:49<2:02:09, 216.96it/s]

finished frames 458400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76526/1666666 [26:50<2:05:41, 210.84it/s]

finished frames 459000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76636/1666666 [26:50<2:03:27, 214.65it/s]

finished frames 459600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76724/1666666 [26:50<2:04:33, 212.73it/s]

finished frames 460200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76834/1666666 [26:51<2:04:43, 212.44it/s]

finished frames 460800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 76944/1666666 [26:51<2:04:28, 212.86it/s]

finished frames 461400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77032/1666666 [26:52<2:07:25, 207.90it/s]

finished frames 462000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77141/1666666 [26:52<2:04:44, 212.38it/s]

finished frames 462600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77229/1666666 [26:53<2:04:12, 213.28it/s]

finished frames 463200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77339/1666666 [26:53<2:05:01, 211.88it/s]

finished frames 463800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77427/1666666 [26:54<2:05:56, 210.32it/s]

finished frames 464400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77534/1666666 [26:54<2:06:24, 209.53it/s]

finished frames 465000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77639/1666666 [26:55<2:07:09, 208.27it/s]

finished frames 465600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77725/1666666 [26:55<2:06:47, 208.86it/s]

finished frames 466200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77831/1666666 [26:56<2:06:53, 208.70it/s]

finished frames 466800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 77938/1666666 [26:56<2:06:17, 209.66it/s]

finished frames 467400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78023/1666666 [26:57<2:08:47, 205.57it/s]

finished frames 468000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78131/1666666 [26:57<2:06:34, 209.17it/s]

finished frames 468600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78238/1666666 [26:58<2:06:09, 209.85it/s]

finished frames 469200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78324/1666666 [26:58<2:06:04, 209.99it/s]

finished frames 469800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78433/1666666 [26:59<2:05:06, 211.59it/s]

finished frames 470400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78543/1666666 [26:59<2:04:08, 213.20it/s]

finished frames 471000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78631/1666666 [27:00<2:03:29, 214.33it/s]

finished frames 471600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78741/1666666 [27:00<2:03:35, 214.14it/s]

finished frames 472200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78829/1666666 [27:00<2:14:38, 196.55it/s]

finished frames 472800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 78938/1666666 [27:01<2:04:39, 212.28it/s]

finished frames 473400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79025/1666666 [27:01<2:09:42, 204.00it/s]

finished frames 474000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79135/1666666 [27:02<2:04:14, 212.95it/s]

finished frames 474600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79223/1666666 [27:02<2:03:21, 214.48it/s]

finished frames 475200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79333/1666666 [27:03<2:03:07, 214.86it/s]

finished frames 475800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79443/1666666 [27:03<2:02:54, 215.23it/s]

finished frames 476400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79531/1666666 [27:04<2:02:52, 215.29it/s]

finished frames 477000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79641/1666666 [27:04<2:02:37, 215.69it/s]

finished frames 477600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79729/1666666 [27:05<2:02:46, 215.42it/s]

finished frames 478200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79839/1666666 [27:05<2:02:37, 215.67it/s]

finished frames 478800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 79927/1666666 [27:06<2:03:16, 214.52it/s]

finished frames 479400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80037/1666666 [27:06<2:05:42, 210.35it/s]

finished frames 480000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80125/1666666 [27:07<2:03:30, 214.09it/s]

finished frames 480600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80235/1666666 [27:07<2:03:03, 214.87it/s]

finished frames 481200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80323/1666666 [27:07<2:02:55, 215.07it/s]

finished frames 481800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80433/1666666 [27:08<2:03:19, 214.38it/s]

finished frames 482400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80543/1666666 [27:09<2:04:21, 212.58it/s]

finished frames 483000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80631/1666666 [27:09<2:04:42, 211.96it/s]

finished frames 483600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80741/1666666 [27:09<2:04:38, 212.07it/s]

finished frames 484200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80829/1666666 [27:10<2:04:36, 212.10it/s]

finished frames 484800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 80939/1666666 [27:10<2:04:01, 213.11it/s]

finished frames 485400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81027/1666666 [27:11<2:07:04, 207.97it/s]

finished frames 486000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81136/1666666 [27:11<2:05:01, 211.38it/s]

finished frames 486600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81224/1666666 [27:12<2:18:53, 190.25it/s]

finished frames 487200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81333/1666666 [27:12<2:06:37, 208.67it/s]

finished frames 487800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81441/1666666 [27:13<2:08:37, 205.40it/s]

finished frames 488400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81526/1666666 [27:13<2:06:55, 208.15it/s]

finished frames 489000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81634/1666666 [27:14<2:05:49, 209.96it/s]

finished frames 489600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81742/1666666 [27:14<2:05:37, 210.27it/s]

finished frames 490200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81827/1666666 [27:15<2:06:11, 209.31it/s]

finished frames 490800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 81934/1666666 [27:15<2:05:58, 209.66it/s]

finished frames 491400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82040/1666666 [27:16<2:08:50, 204.97it/s]

finished frames 492000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82125/1666666 [27:16<2:06:57, 208.02it/s]

finished frames 492600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82230/1666666 [27:17<2:06:26, 208.86it/s]

finished frames 493200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82335/1666666 [27:17<2:06:16, 209.10it/s]

finished frames 493800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82440/1666666 [27:18<2:06:20, 208.99it/s]

finished frames 494400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82525/1666666 [27:18<2:05:54, 209.70it/s]

finished frames 495000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82634/1666666 [27:19<2:05:13, 210.83it/s]

finished frames 495600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82722/1666666 [27:19<2:05:11, 210.86it/s]

finished frames 496200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82831/1666666 [27:20<2:05:16, 210.71it/s]

finished frames 496800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 82938/1666666 [27:20<2:05:57, 209.57it/s]

finished frames 497400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 83024/1666666 [27:20<2:08:28, 205.43it/s]

finished frames 498000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 83132/1666666 [27:21<2:05:53, 209.64it/s]

finished frames 498600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 83239/1666666 [27:21<2:05:45, 209.86it/s]

finished frames 499200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▍         | 83325/1666666 [27:22<2:05:18, 210.59it/s]

finished frames 499800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 83433/1666666 [27:22<2:05:44, 209.86it/s]

finished frames 500400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 83540/1666666 [27:23<2:06:05, 209.27it/s]

finished frames 501000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 83624/1666666 [27:23<2:13:59, 196.92it/s]

finished frames 501600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 83730/1666666 [27:24<2:12:27, 199.19it/s]

finished frames 502200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 83835/1666666 [27:24<2:07:24, 207.06it/s]

finished frames 502800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 83943/1666666 [27:25<2:05:52, 209.56it/s]

finished frames 503400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84027/1666666 [27:25<2:09:02, 204.42it/s]

finished frames 504000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84135/1666666 [27:26<2:06:17, 208.84it/s]

finished frames 504600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84240/1666666 [27:26<2:06:25, 208.62it/s]

finished frames 505200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84325/1666666 [27:27<2:06:02, 209.24it/s]

finished frames 505800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84432/1666666 [27:27<2:05:42, 209.77it/s]

finished frames 506400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84538/1666666 [27:28<2:06:02, 209.19it/s]

finished frames 507000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84624/1666666 [27:28<2:05:54, 209.40it/s]

finished frames 507600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84731/1666666 [27:29<2:05:43, 209.71it/s]

finished frames 508200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84837/1666666 [27:29<2:06:10, 208.95it/s]

finished frames 508800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 84942/1666666 [27:30<2:06:16, 208.76it/s]

finished frames 509400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85026/1666666 [27:30<2:09:37, 203.36it/s]

finished frames 510000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85132/1666666 [27:31<2:06:50, 207.82it/s]

finished frames 510600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85239/1666666 [27:31<2:06:07, 208.98it/s]

finished frames 511200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85326/1666666 [27:32<2:04:59, 210.85it/s]

finished frames 511800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85436/1666666 [27:32<2:03:30, 213.39it/s]

finished frames 512400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85524/1666666 [27:33<2:02:49, 214.54it/s]

finished frames 513000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85634/1666666 [27:33<2:02:31, 215.05it/s]

finished frames 513600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85744/1666666 [27:34<2:02:37, 214.88it/s]

finished frames 514200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85832/1666666 [27:34<2:02:32, 215.00it/s]

finished frames 514800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 85920/1666666 [27:34<2:13:09, 197.85it/s]

finished frames 515400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86029/1666666 [27:35<2:06:41, 207.92it/s]

finished frames 516000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86139/1666666 [27:35<2:02:27, 215.11it/s]

finished frames 516600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86227/1666666 [27:36<2:02:04, 215.77it/s]

finished frames 517200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86337/1666666 [27:36<2:02:05, 215.73it/s]

finished frames 517800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86425/1666666 [27:37<2:01:56, 215.97it/s]

finished frames 518400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86535/1666666 [27:37<2:01:55, 216.00it/s]

finished frames 519000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86623/1666666 [27:38<2:02:30, 214.96it/s]

finished frames 519600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86733/1666666 [27:38<2:02:12, 215.46it/s]

finished frames 520200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86843/1666666 [27:39<2:02:14, 215.40it/s]

finished frames 520800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 86931/1666666 [27:39<2:02:14, 215.39it/s]

finished frames 521400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87041/1666666 [27:40<2:04:46, 210.99it/s]

finished frames 522000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87129/1666666 [27:40<2:03:11, 213.71it/s]

finished frames 522600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87239/1666666 [27:41<2:02:07, 215.54it/s]

finished frames 523200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87327/1666666 [27:41<2:01:59, 215.78it/s]

finished frames 523800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87437/1666666 [27:41<2:02:24, 215.01it/s]

finished frames 524400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87525/1666666 [27:42<2:02:41, 214.50it/s]

finished frames 525000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87635/1666666 [27:42<2:03:15, 213.52it/s]

finished frames 525600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87723/1666666 [27:43<2:02:29, 214.82it/s]

finished frames 526200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87833/1666666 [27:43<2:02:18, 215.14it/s]

finished frames 526800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 87943/1666666 [27:44<2:02:00, 215.66it/s]

finished frames 527400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88031/1666666 [27:44<2:05:06, 210.30it/s]

finished frames 528000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88141/1666666 [27:45<2:02:28, 214.80it/s]

finished frames 528600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88229/1666666 [27:45<2:02:25, 214.87it/s]

finished frames 529200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88317/1666666 [27:46<2:02:10, 215.32it/s]

finished frames 529800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88427/1666666 [27:46<2:05:09, 210.15it/s]

finished frames 530400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88536/1666666 [27:47<2:06:19, 208.22it/s]

finished frames 531000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88623/1666666 [27:47<2:03:21, 213.19it/s]

finished frames 531600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88733/1666666 [27:48<2:06:08, 208.49it/s]

finished frames 532200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88841/1666666 [27:48<2:06:06, 208.52it/s]

finished frames 532800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 88928/1666666 [27:49<2:05:14, 209.95it/s]

finished frames 533400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89035/1666666 [27:49<2:07:56, 205.52it/s]

finished frames 534000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89122/1666666 [27:49<2:05:39, 209.23it/s]

finished frames 534600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89230/1666666 [27:50<2:04:53, 210.50it/s]

finished frames 535200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89339/1666666 [27:50<2:04:36, 210.97it/s]

finished frames 535800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89424/1666666 [27:51<2:07:54, 205.52it/s]

finished frames 536400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89529/1666666 [27:51<2:09:03, 203.68it/s]

finished frames 537000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89637/1666666 [27:52<2:05:53, 208.78it/s]

finished frames 537600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89724/1666666 [27:52<2:04:17, 211.45it/s]

finished frames 538200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89834/1666666 [27:53<2:04:02, 211.88it/s]

finished frames 538800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 89944/1666666 [27:53<2:03:11, 213.32it/s]

finished frames 539400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90030/1666666 [27:54<2:09:42, 202.57it/s]

finished frames 540000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90139/1666666 [27:54<2:04:29, 211.07it/s]

finished frames 540600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90227/1666666 [27:55<2:04:15, 211.44it/s]

finished frames 541200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90337/1666666 [27:55<2:03:15, 213.13it/s]

finished frames 541800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90424/1666666 [27:56<2:08:22, 204.63it/s]

finished frames 542400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90534/1666666 [27:56<2:04:17, 211.36it/s]

finished frames 543000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90644/1666666 [27:57<2:03:30, 212.68it/s]

finished frames 543600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90732/1666666 [27:57<2:07:38, 205.79it/s]

finished frames 544200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90841/1666666 [27:58<2:09:26, 202.91it/s]

finished frames 544800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 90928/1666666 [27:58<2:04:46, 210.49it/s]

finished frames 545400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 91038/1666666 [27:59<2:05:58, 208.46it/s]

finished frames 546000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 91125/1666666 [27:59<2:03:56, 211.86it/s]

finished frames 546600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 91235/1666666 [28:00<2:03:23, 212.79it/s]

finished frames 547200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 91323/1666666 [28:00<2:03:15, 213.01it/s]

finished frames 547800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 91433/1666666 [28:01<2:03:23, 212.76it/s]

finished frames 548400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 91543/1666666 [28:01<2:02:54, 213.60it/s]

finished frames 549000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  5%|▌         | 91631/1666666 [28:01<2:03:39, 212.29it/s]

finished frames 549600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 91741/1666666 [28:02<2:03:07, 213.19it/s]

finished frames 550200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 91829/1666666 [28:02<2:03:09, 213.13it/s]

finished frames 550800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 91939/1666666 [28:03<2:02:58, 213.41it/s]

finished frames 551400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92027/1666666 [28:03<2:05:32, 209.06it/s]

finished frames 552000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92136/1666666 [28:04<2:03:12, 212.98it/s]

finished frames 552600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92224/1666666 [28:04<2:03:21, 212.72it/s]

finished frames 553200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92334/1666666 [28:05<2:02:51, 213.56it/s]

finished frames 553800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92422/1666666 [28:05<2:03:15, 212.88it/s]

finished frames 554400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92532/1666666 [28:06<2:03:01, 213.24it/s]

finished frames 555000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92642/1666666 [28:06<2:02:54, 213.44it/s]

finished frames 555600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92730/1666666 [28:07<2:03:18, 212.74it/s]

finished frames 556200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92840/1666666 [28:07<2:03:07, 213.04it/s]

finished frames 556800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 92928/1666666 [28:08<2:03:13, 212.84it/s]

finished frames 557400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93038/1666666 [28:08<2:06:00, 208.14it/s]

finished frames 558000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93125/1666666 [28:09<2:07:53, 205.07it/s]

finished frames 558600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93234/1666666 [28:09<2:07:39, 205.42it/s]

finished frames 559200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93343/1666666 [28:10<2:03:22, 212.55it/s]

finished frames 559800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93431/1666666 [28:10<2:03:02, 213.11it/s]

finished frames 560400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93541/1666666 [28:11<2:02:47, 213.52it/s]

finished frames 561000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93629/1666666 [28:11<2:02:47, 213.51it/s]

finished frames 561600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93739/1666666 [28:11<2:02:37, 213.78it/s]

finished frames 562200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93827/1666666 [28:12<2:02:36, 213.79it/s]

finished frames 562800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 93937/1666666 [28:12<2:02:43, 213.60it/s]

finished frames 563400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94025/1666666 [28:13<2:05:10, 209.39it/s]

finished frames 564000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94134/1666666 [28:13<2:02:52, 213.29it/s]

finished frames 564600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94244/1666666 [28:14<2:02:35, 213.78it/s]

finished frames 565200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94332/1666666 [28:14<2:02:53, 213.25it/s]

finished frames 565800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94442/1666666 [28:15<2:01:49, 215.09it/s]

finished frames 566400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94530/1666666 [28:15<2:02:20, 214.18it/s]

finished frames 567000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94640/1666666 [28:16<2:02:18, 214.22it/s]

finished frames 567600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94728/1666666 [28:16<2:02:43, 213.48it/s]

finished frames 568200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94838/1666666 [28:17<2:02:37, 213.64it/s]

finished frames 568800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 94926/1666666 [28:17<2:02:46, 213.36it/s]

finished frames 569400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95036/1666666 [28:18<2:05:21, 208.94it/s]

finished frames 570000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95123/1666666 [28:18<2:03:25, 212.21it/s]

finished frames 570600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95233/1666666 [28:18<2:02:51, 213.18it/s]

finished frames 571200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95343/1666666 [28:19<2:02:54, 213.07it/s]

finished frames 571800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95431/1666666 [28:19<2:13:51, 195.62it/s]

finished frames 572400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95540/1666666 [28:20<2:04:28, 210.36it/s]

finished frames 573000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95628/1666666 [28:20<2:06:45, 206.56it/s]

finished frames 573600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95737/1666666 [28:21<2:03:24, 212.16it/s]

finished frames 574200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95825/1666666 [28:21<2:02:45, 213.26it/s]

finished frames 574800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 95935/1666666 [28:22<2:02:43, 213.32it/s]

finished frames 575400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96023/1666666 [28:22<2:05:28, 208.63it/s]

finished frames 576000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96133/1666666 [28:23<2:03:11, 212.48it/s]

finished frames 576600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96243/1666666 [28:23<2:02:40, 213.37it/s]

finished frames 577200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96331/1666666 [28:24<2:02:54, 212.95it/s]

finished frames 577800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96441/1666666 [28:24<2:02:42, 213.27it/s]

finished frames 578400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96529/1666666 [28:25<2:02:32, 213.55it/s]

finished frames 579000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96639/1666666 [28:25<2:02:26, 213.72it/s]

finished frames 579600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96727/1666666 [28:26<2:02:38, 213.34it/s]

finished frames 580200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96837/1666666 [28:26<2:02:25, 213.72it/s]

finished frames 580800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 96925/1666666 [28:27<2:02:35, 213.41it/s]

finished frames 581400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97035/1666666 [28:27<2:05:15, 208.85it/s]

finished frames 582000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97122/1666666 [28:27<2:03:47, 211.31it/s]

finished frames 582600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97232/1666666 [28:28<2:03:17, 212.16it/s]

finished frames 583200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97342/1666666 [28:28<2:02:39, 213.24it/s]

finished frames 583800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97430/1666666 [28:29<2:02:26, 213.61it/s]

finished frames 584400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97540/1666666 [28:29<2:02:45, 213.02it/s]

finished frames 585000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97628/1666666 [28:30<2:02:23, 213.67it/s]

finished frames 585600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97738/1666666 [28:30<2:02:14, 213.91it/s]

finished frames 586200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97826/1666666 [28:31<2:14:20, 194.64it/s]

finished frames 586800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 97934/1666666 [28:31<2:17:14, 190.50it/s]

finished frames 587400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98020/1666666 [28:32<2:10:33, 200.25it/s]

finished frames 588000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98129/1666666 [28:32<2:04:53, 209.33it/s]

finished frames 588600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98238/1666666 [28:33<2:03:17, 212.02it/s]

finished frames 589200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98326/1666666 [28:33<2:03:33, 211.55it/s]

finished frames 589800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98436/1666666 [28:34<2:03:22, 211.85it/s]

finished frames 590400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98524/1666666 [28:34<2:03:22, 211.85it/s]

finished frames 591000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98634/1666666 [28:35<2:03:15, 212.01it/s]

finished frames 591600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98722/1666666 [28:35<2:03:12, 212.10it/s]

finished frames 592200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98832/1666666 [28:36<2:03:09, 212.18it/s]

finished frames 592800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 98942/1666666 [28:36<2:03:16, 211.94it/s]

finished frames 593400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99030/1666666 [28:37<2:05:48, 207.67it/s]

finished frames 594000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99139/1666666 [28:37<2:03:43, 211.16it/s]

finished frames 594600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99227/1666666 [28:37<2:03:28, 211.57it/s]

finished frames 595200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99337/1666666 [28:38<2:03:03, 212.26it/s]

finished frames 595800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99425/1666666 [28:38<2:03:15, 211.93it/s]

finished frames 596400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99535/1666666 [28:39<2:03:06, 212.16it/s]

finished frames 597000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99623/1666666 [28:39<2:03:02, 212.26it/s]

finished frames 597600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99733/1666666 [28:40<2:02:58, 212.36it/s]

finished frames 598200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99843/1666666 [28:40<2:02:56, 212.41it/s]

finished frames 598800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 99931/1666666 [28:41<2:02:53, 212.49it/s]

finished frames 599400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100041/1666666 [28:41<2:06:09, 206.96it/s]

finished frames 600000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100128/1666666 [28:42<2:04:08, 210.32it/s]

finished frames 600600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100237/1666666 [28:42<2:09:00, 202.37it/s]

finished frames 601200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100323/1666666 [28:43<2:12:58, 196.31it/s]

finished frames 601800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100432/1666666 [28:43<2:05:02, 208.76it/s]

finished frames 602400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100541/1666666 [28:44<2:03:13, 211.82it/s]

finished frames 603000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100629/1666666 [28:44<2:03:28, 211.37it/s]

finished frames 603600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100739/1666666 [28:45<2:03:04, 212.04it/s]

finished frames 604200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100827/1666666 [28:45<2:03:37, 211.10it/s]

finished frames 604800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 100937/1666666 [28:46<2:03:09, 211.90it/s]

finished frames 605400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101025/1666666 [28:46<2:05:44, 207.52it/s]

finished frames 606000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101134/1666666 [28:47<2:04:17, 209.93it/s]

finished frames 606600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101243/1666666 [28:47<2:03:24, 211.41it/s]

finished frames 607200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101331/1666666 [28:48<2:03:34, 211.10it/s]

finished frames 607800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101441/1666666 [28:48<2:02:54, 212.26it/s]

finished frames 608400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101529/1666666 [28:48<2:03:11, 211.75it/s]

finished frames 609000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101639/1666666 [28:49<2:02:56, 212.16it/s]

finished frames 609600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101727/1666666 [28:49<2:03:28, 211.25it/s]

finished frames 610200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101837/1666666 [28:50<2:03:37, 210.96it/s]

finished frames 610800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 101924/1666666 [28:50<2:05:29, 207.82it/s]

finished frames 611400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102031/1666666 [28:51<2:06:54, 205.48it/s]

finished frames 612000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102140/1666666 [28:51<2:02:12, 213.37it/s]

finished frames 612600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102228/1666666 [28:52<2:00:55, 215.63it/s]

finished frames 613200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102338/1666666 [28:52<2:00:15, 216.79it/s]

finished frames 613800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102426/1666666 [28:53<2:00:47, 215.83it/s]

finished frames 614400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102536/1666666 [28:53<2:01:13, 215.03it/s]

finished frames 615000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102624/1666666 [28:54<2:04:45, 208.93it/s]

finished frames 615600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102734/1666666 [28:54<2:05:12, 208.17it/s]

finished frames 616200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102844/1666666 [28:55<2:01:06, 215.21it/s]

finished frames 616800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 102932/1666666 [28:55<2:01:08, 215.14it/s]

finished frames 617400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103020/1666666 [28:56<2:04:46, 208.86it/s]

finished frames 618000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103130/1666666 [28:56<2:02:00, 213.58it/s]

finished frames 618600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103240/1666666 [28:57<2:01:07, 215.12it/s]

finished frames 619200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103328/1666666 [28:57<2:01:12, 214.97it/s]

finished frames 619800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103438/1666666 [28:57<2:00:50, 215.60it/s]

finished frames 620400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103526/1666666 [28:58<2:00:50, 215.59it/s]

finished frames 621000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103636/1666666 [28:58<2:00:51, 215.55it/s]

finished frames 621600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103724/1666666 [28:59<2:01:25, 214.52it/s]

finished frames 622200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103834/1666666 [28:59<2:00:16, 216.56it/s]

finished frames 622800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 103944/1666666 [29:00<2:00:08, 216.78it/s]

finished frames 623400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 104032/1666666 [29:00<2:03:41, 210.57it/s]

finished frames 624000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▌         | 104142/1666666 [29:01<2:01:35, 214.19it/s]

finished frames 624600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 104230/1666666 [29:01<2:01:37, 214.10it/s]

finished frames 625200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 104340/1666666 [29:02<2:01:17, 214.67it/s]

finished frames 625800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 104428/1666666 [29:02<2:01:01, 215.15it/s]

finished frames 626400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 104538/1666666 [29:03<2:01:02, 215.11it/s]

finished frames 627000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 104626/1666666 [29:03<2:01:04, 215.03it/s]

finished frames 627600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 104736/1666666 [29:04<2:00:42, 215.67it/s]

finished frames 628200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 104824/1666666 [29:04<2:01:04, 215.01it/s]

finished frames 628800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 104934/1666666 [29:04<2:01:06, 214.93it/s]

finished frames 629400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105021/1666666 [29:05<2:12:41, 196.15it/s]

finished frames 630000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105130/1666666 [29:06<2:09:59, 200.22it/s]

finished frames 630600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105240/1666666 [29:06<2:01:52, 213.52it/s]

finished frames 631200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105328/1666666 [29:06<2:01:05, 214.89it/s]

finished frames 631800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105438/1666666 [29:07<2:00:28, 215.97it/s]

finished frames 632400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105526/1666666 [29:07<2:00:05, 216.65it/s]

finished frames 633000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105636/1666666 [29:08<2:00:22, 216.14it/s]

finished frames 633600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105724/1666666 [29:08<2:00:15, 216.33it/s]

finished frames 634200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105834/1666666 [29:09<2:00:12, 216.42it/s]

finished frames 634800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 105944/1666666 [29:09<2:00:01, 216.74it/s]

finished frames 635400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106032/1666666 [29:10<2:02:36, 212.13it/s]

finished frames 636000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106142/1666666 [29:10<2:00:21, 216.09it/s]

finished frames 636600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106230/1666666 [29:11<2:00:26, 215.95it/s]

finished frames 637200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106340/1666666 [29:11<2:00:00, 216.70it/s]

finished frames 637800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106428/1666666 [29:12<2:00:06, 216.51it/s]

finished frames 638400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106538/1666666 [29:12<1:59:59, 216.69it/s]

finished frames 639000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106626/1666666 [29:12<1:59:54, 216.85it/s]

finished frames 639600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106736/1666666 [29:13<1:59:37, 217.35it/s]

finished frames 640200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106824/1666666 [29:13<1:59:54, 216.80it/s]

finished frames 640800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 106934/1666666 [29:14<1:59:59, 216.64it/s]

finished frames 641400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107022/1666666 [29:14<2:03:37, 210.27it/s]

finished frames 642000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107132/1666666 [29:15<2:00:39, 215.42it/s]

finished frames 642600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107242/1666666 [29:15<1:59:40, 217.17it/s]

finished frames 643200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107330/1666666 [29:16<1:59:54, 216.74it/s]

finished frames 643800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107440/1666666 [29:16<2:03:37, 210.20it/s]

finished frames 644400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107528/1666666 [29:17<2:06:04, 206.12it/s]

finished frames 645000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107638/1666666 [29:17<2:01:15, 214.28it/s]

finished frames 645600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107726/1666666 [29:18<2:00:32, 215.54it/s]

finished frames 646200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107836/1666666 [29:18<1:59:43, 217.00it/s]

finished frames 646800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 107924/1666666 [29:19<2:00:01, 216.44it/s]

finished frames 647400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 108034/1666666 [29:19<2:02:04, 212.80it/s]

finished frames 648000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 108144/1666666 [29:20<2:00:13, 216.05it/s]

finished frames 648600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  6%|▋         | 108232/1666666 [29:20<1:59:53, 216.64it/s]

finished frames 649200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 108342/1666666 [29:20<1:59:46, 216.85it/s]

finished frames 649800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 108430/1666666 [29:21<1:59:45, 216.86it/s]

finished frames 650400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 108540/1666666 [29:21<2:01:25, 213.87it/s]

finished frames 651000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 108628/1666666 [29:22<2:01:41, 213.38it/s]

finished frames 651600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 108738/1666666 [29:22<2:02:07, 212.61it/s]

finished frames 652200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 108826/1666666 [29:23<2:01:56, 212.93it/s]

finished frames 652800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 108936/1666666 [29:23<2:01:26, 213.78it/s]

finished frames 653400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109024/1666666 [29:24<2:04:18, 208.85it/s]

finished frames 654000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109134/1666666 [29:24<2:02:05, 212.61it/s]

finished frames 654600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109222/1666666 [29:25<2:01:56, 212.86it/s]

finished frames 655200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109332/1666666 [29:25<2:01:48, 213.08it/s]

finished frames 655800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109442/1666666 [29:26<2:01:50, 213.00it/s]

finished frames 656400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109530/1666666 [29:26<2:02:23, 212.03it/s]

finished frames 657000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109640/1666666 [29:27<2:01:47, 213.07it/s]

finished frames 657600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109728/1666666 [29:27<2:01:17, 213.93it/s]

finished frames 658200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109838/1666666 [29:28<2:05:20, 207.00it/s]

finished frames 658800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 109924/1666666 [29:28<2:07:44, 203.11it/s]

finished frames 659400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110033/1666666 [29:29<2:04:49, 207.84it/s]

finished frames 660000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110142/1666666 [29:29<2:01:42, 213.16it/s]

finished frames 660600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110230/1666666 [29:29<2:01:43, 213.11it/s]

finished frames 661200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110340/1666666 [29:30<2:01:11, 214.02it/s]

finished frames 661800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110428/1666666 [29:30<2:01:24, 213.65it/s]

finished frames 662400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110538/1666666 [29:31<2:01:57, 212.66it/s]

finished frames 663000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110626/1666666 [29:31<2:02:12, 212.23it/s]

finished frames 663600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110736/1666666 [29:32<2:02:06, 212.37it/s]

finished frames 664200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110824/1666666 [29:32<2:02:07, 212.32it/s]

finished frames 664800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 110934/1666666 [29:33<2:02:23, 211.84it/s]

finished frames 665400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111022/1666666 [29:33<2:05:12, 207.07it/s]

finished frames 666000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111132/1666666 [29:34<2:01:47, 212.88it/s]

finished frames 666600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111242/1666666 [29:34<2:00:26, 215.25it/s]

finished frames 667200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111330/1666666 [29:35<2:00:43, 214.73it/s]

finished frames 667800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111440/1666666 [29:35<2:00:26, 215.21it/s]

finished frames 668400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111528/1666666 [29:36<2:00:29, 215.12it/s]

finished frames 669000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111638/1666666 [29:36<2:00:21, 215.33it/s]

finished frames 669600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111726/1666666 [29:36<2:00:23, 215.25it/s]

finished frames 670200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111836/1666666 [29:37<2:00:18, 215.40it/s]

finished frames 670800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 111924/1666666 [29:37<2:00:29, 215.06it/s]

finished frames 671400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112034/1666666 [29:38<2:03:41, 209.46it/s]

finished frames 672000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112143/1666666 [29:38<2:00:45, 214.55it/s]

finished frames 672600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112231/1666666 [29:39<2:01:00, 214.09it/s]

finished frames 673200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112341/1666666 [29:39<2:03:54, 209.08it/s]

finished frames 673800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112429/1666666 [29:40<2:01:38, 212.96it/s]

finished frames 674400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112539/1666666 [29:40<2:00:42, 214.60it/s]

finished frames 675000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112627/1666666 [29:41<2:00:14, 215.39it/s]

finished frames 675600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112737/1666666 [29:41<1:59:43, 216.32it/s]

finished frames 676200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112825/1666666 [29:42<2:00:29, 214.93it/s]

finished frames 676800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 112935/1666666 [29:42<2:00:51, 214.26it/s]

finished frames 677400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113023/1666666 [29:43<2:03:27, 209.73it/s]

finished frames 678000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113133/1666666 [29:43<2:00:56, 214.08it/s]

finished frames 678600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113243/1666666 [29:44<2:00:54, 214.12it/s]

finished frames 679200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113331/1666666 [29:44<2:00:59, 213.99it/s]

finished frames 679800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113441/1666666 [29:45<2:00:18, 215.16it/s]

finished frames 680400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113529/1666666 [29:45<2:00:26, 214.91it/s]

finished frames 681000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113639/1666666 [29:45<2:00:42, 214.45it/s]

finished frames 681600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113727/1666666 [29:46<2:00:39, 214.51it/s]

finished frames 682200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113837/1666666 [29:46<2:00:18, 215.11it/s]

finished frames 682800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 113923/1666666 [29:47<2:03:31, 209.51it/s]

finished frames 683400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114033/1666666 [29:47<2:03:38, 209.30it/s]

finished frames 684000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114143/1666666 [29:48<2:01:16, 213.35it/s]

finished frames 684600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114231/1666666 [29:48<2:01:15, 213.39it/s]

finished frames 685200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114341/1666666 [29:49<2:00:14, 215.17it/s]

finished frames 685800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114429/1666666 [29:49<2:00:28, 214.75it/s]

finished frames 686400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114539/1666666 [29:50<2:00:10, 215.27it/s]

finished frames 687000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114627/1666666 [29:50<2:04:06, 208.43it/s]

finished frames 687600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114737/1666666 [29:51<2:04:30, 207.73it/s]

finished frames 688200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114825/1666666 [29:51<2:01:40, 212.58it/s]

finished frames 688800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 114935/1666666 [29:52<2:00:47, 214.12it/s]

finished frames 689400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115023/1666666 [29:52<2:03:05, 210.10it/s]

finished frames 690000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115133/1666666 [29:53<2:00:26, 214.70it/s]

finished frames 690600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115243/1666666 [29:53<2:00:19, 214.90it/s]

finished frames 691200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115331/1666666 [29:53<2:00:13, 215.07it/s]

finished frames 691800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115441/1666666 [29:54<2:04:11, 208.19it/s]

finished frames 692400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115528/1666666 [29:54<2:01:17, 213.14it/s]

finished frames 693000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115638/1666666 [29:55<2:00:49, 213.96it/s]

finished frames 693600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115726/1666666 [29:55<2:00:45, 214.06it/s]

finished frames 694200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115836/1666666 [29:56<2:05:06, 206.59it/s]

finished frames 694800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 115923/1666666 [29:56<2:01:33, 212.61it/s]

finished frames 695400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116033/1666666 [29:57<2:03:06, 209.91it/s]

finished frames 696000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116143/1666666 [29:57<2:00:38, 214.20it/s]

finished frames 696600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116231/1666666 [29:58<2:00:24, 214.61it/s]

finished frames 697200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116341/1666666 [29:58<2:00:15, 214.85it/s]

finished frames 697800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116429/1666666 [29:59<2:00:08, 215.06it/s]

finished frames 698400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116539/1666666 [29:59<2:00:08, 215.03it/s]

finished frames 699000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116627/1666666 [30:00<2:00:25, 214.52it/s]

finished frames 699600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116737/1666666 [30:00<2:00:12, 214.90it/s]

finished frames 700200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116825/1666666 [30:00<2:00:50, 213.75it/s]

finished frames 700800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 116935/1666666 [30:01<2:00:55, 213.61it/s]

finished frames 701400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117022/1666666 [30:01<2:03:43, 208.76it/s]

finished frames 702000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117132/1666666 [30:02<2:07:33, 202.46it/s]

finished frames 702600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117242/1666666 [30:02<2:01:00, 213.41it/s]

finished frames 703200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117330/1666666 [30:03<2:00:47, 213.78it/s]

finished frames 703800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117440/1666666 [30:03<2:00:11, 214.83it/s]

finished frames 704400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117528/1666666 [30:04<1:59:41, 215.70it/s]

finished frames 705000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117638/1666666 [30:04<1:59:32, 215.96it/s]

finished frames 705600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117726/1666666 [30:05<1:59:44, 215.58it/s]

finished frames 706200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117836/1666666 [30:05<1:59:55, 215.24it/s]

finished frames 706800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 117924/1666666 [30:06<1:59:50, 215.38it/s]

finished frames 707400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118034/1666666 [30:06<2:02:14, 211.14it/s]

finished frames 708000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118144/1666666 [30:07<1:59:57, 215.15it/s]

finished frames 708600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118232/1666666 [30:07<1:59:46, 215.48it/s]

finished frames 709200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118342/1666666 [30:08<1:59:09, 216.56it/s]

finished frames 709800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118430/1666666 [30:08<2:00:26, 214.24it/s]

finished frames 710400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118540/1666666 [30:09<1:59:53, 215.22it/s]

finished frames 711000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118628/1666666 [30:09<1:59:17, 216.27it/s]

finished frames 711600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118738/1666666 [30:09<1:59:34, 215.75it/s]

finished frames 712200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118826/1666666 [30:10<1:59:36, 215.68it/s]

finished frames 712800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 118936/1666666 [30:10<1:59:45, 215.40it/s]

finished frames 713400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119024/1666666 [30:11<2:02:56, 209.80it/s]

finished frames 714000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119134/1666666 [30:11<1:59:55, 215.06it/s]

finished frames 714600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119244/1666666 [30:12<1:59:38, 215.55it/s]

finished frames 715200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119332/1666666 [30:12<1:59:39, 215.52it/s]

finished frames 715800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119442/1666666 [30:13<2:02:08, 211.11it/s]

finished frames 716400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119530/1666666 [30:13<2:03:41, 208.48it/s]

finished frames 717000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119640/1666666 [30:14<2:00:12, 214.49it/s]

finished frames 717600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119728/1666666 [30:14<1:59:51, 215.12it/s]

finished frames 718200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119838/1666666 [30:15<1:59:39, 215.46it/s]

finished frames 718800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 119926/1666666 [30:15<1:59:30, 215.70it/s]

finished frames 719400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120036/1666666 [30:16<2:02:29, 210.45it/s]

finished frames 720000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120124/1666666 [30:16<2:01:43, 211.75it/s]

finished frames 720600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120234/1666666 [30:17<2:01:17, 212.50it/s]

finished frames 721200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120322/1666666 [30:17<2:01:15, 212.54it/s]

finished frames 721800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120432/1666666 [30:17<2:01:16, 212.49it/s]

finished frames 722400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120542/1666666 [30:18<2:01:26, 212.20it/s]

finished frames 723000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120630/1666666 [30:18<2:01:22, 212.29it/s]

finished frames 723600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120740/1666666 [30:19<2:01:16, 212.45it/s]

finished frames 724200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120828/1666666 [30:19<2:01:08, 212.67it/s]

finished frames 724800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 120938/1666666 [30:20<2:00:56, 213.02it/s]

finished frames 725400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121026/1666666 [30:20<2:03:47, 208.09it/s]

finished frames 726000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121135/1666666 [30:21<2:01:36, 211.83it/s]

finished frames 726600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121223/1666666 [30:21<2:01:14, 212.45it/s]

finished frames 727200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121333/1666666 [30:22<2:01:14, 212.44it/s]

finished frames 727800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121443/1666666 [30:22<2:01:20, 212.25it/s]

finished frames 728400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121531/1666666 [30:23<2:01:06, 212.65it/s]

finished frames 729000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121641/1666666 [30:23<2:01:08, 212.56it/s]

finished frames 729600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121729/1666666 [30:24<2:01:12, 212.43it/s]

finished frames 730200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121839/1666666 [30:24<2:01:19, 212.23it/s]

finished frames 730800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 121927/1666666 [30:25<2:04:55, 206.08it/s]

finished frames 731400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122036/1666666 [30:25<2:04:18, 207.10it/s]

finished frames 732000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122123/1666666 [30:25<2:02:23, 210.33it/s]

finished frames 732600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122233/1666666 [30:26<2:01:27, 211.93it/s]

finished frames 733200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122343/1666666 [30:27<2:00:51, 212.96it/s]

finished frames 733800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122431/1666666 [30:27<2:01:05, 212.54it/s]

finished frames 734400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122541/1666666 [30:27<2:01:11, 212.37it/s]

finished frames 735000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122628/1666666 [30:28<2:02:42, 209.71it/s]

finished frames 735600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122733/1666666 [30:28<2:03:01, 209.15it/s]

finished frames 736200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122838/1666666 [30:29<2:03:27, 208.42it/s]

finished frames 736800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 122924/1666666 [30:29<2:02:28, 210.08it/s]

finished frames 737400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123030/1666666 [30:30<2:05:14, 205.42it/s]

finished frames 738000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123136/1666666 [30:30<2:03:10, 208.85it/s]

finished frames 738600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123243/1666666 [30:31<2:02:46, 209.51it/s]

finished frames 739200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123330/1666666 [30:31<2:02:37, 209.76it/s]

finished frames 739800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123436/1666666 [30:32<2:03:08, 208.87it/s]

finished frames 740400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123541/1666666 [30:32<2:03:01, 209.04it/s]

finished frames 741000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123627/1666666 [30:33<2:02:38, 209.68it/s]

finished frames 741600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123733/1666666 [30:33<2:02:42, 209.56it/s]

finished frames 742200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123838/1666666 [30:34<2:03:05, 208.90it/s]

finished frames 742800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 123922/1666666 [30:34<2:03:19, 208.48it/s]

finished frames 743400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124028/1666666 [30:35<2:06:05, 203.90it/s]

finished frames 744000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124134/1666666 [30:35<2:08:57, 199.37it/s]

finished frames 744600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124239/1666666 [30:36<2:04:17, 206.82it/s]

finished frames 745200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124324/1666666 [30:36<2:03:23, 208.31it/s]

finished frames 745800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124430/1666666 [30:37<2:03:07, 208.77it/s]

finished frames 746400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124537/1666666 [30:37<2:02:50, 209.23it/s]

finished frames 747000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124642/1666666 [30:38<2:03:22, 208.32it/s]

finished frames 747600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124726/1666666 [30:38<2:03:19, 208.39it/s]

finished frames 748200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124831/1666666 [30:38<2:03:08, 208.69it/s]

finished frames 748800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  7%|▋         | 124936/1666666 [30:39<2:03:15, 208.46it/s]

finished frames 749400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125021/1666666 [30:39<2:07:05, 202.16it/s]

finished frames 750000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125128/1666666 [30:40<2:03:01, 208.84it/s]

finished frames 750600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125236/1666666 [30:40<2:02:05, 210.43it/s]

finished frames 751200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125324/1666666 [30:41<2:01:31, 211.37it/s]

finished frames 751800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125434/1666666 [30:41<2:02:07, 210.34it/s]

finished frames 752400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125522/1666666 [30:42<2:01:43, 211.00it/s]

finished frames 753000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125632/1666666 [30:42<2:01:52, 210.73it/s]

finished frames 753600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125742/1666666 [30:43<2:01:25, 211.50it/s]

finished frames 754200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125830/1666666 [30:43<2:01:50, 210.78it/s]

finished frames 754800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 125939/1666666 [30:44<2:01:43, 210.96it/s]

finished frames 755400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126024/1666666 [30:44<2:04:35, 206.08it/s]

finished frames 756000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126132/1666666 [30:45<2:01:58, 210.49it/s]

finished frames 756600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126241/1666666 [30:45<2:02:07, 210.23it/s]

finished frames 757200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126327/1666666 [30:46<2:02:27, 209.63it/s]

finished frames 757800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126413/1666666 [30:46<2:02:08, 210.18it/s]

finished frames 758400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126522/1666666 [30:47<2:07:31, 201.29it/s]

finished frames 759000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126630/1666666 [30:47<2:02:49, 208.97it/s]

finished frames 759600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126739/1666666 [30:48<2:02:03, 210.27it/s]

finished frames 760200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126827/1666666 [30:48<2:01:51, 210.59it/s]

finished frames 760800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 126937/1666666 [30:49<2:01:06, 211.90it/s]

finished frames 761400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127025/1666666 [30:49<2:03:59, 206.95it/s]

finished frames 762000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127133/1666666 [30:50<2:02:22, 209.68it/s]

finished frames 762600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127242/1666666 [30:50<2:01:37, 210.97it/s]

finished frames 763200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127330/1666666 [30:50<2:01:57, 210.35it/s]

finished frames 763800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127439/1666666 [30:51<2:01:32, 211.06it/s]

finished frames 764400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127527/1666666 [30:51<2:01:18, 211.46it/s]

finished frames 765000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127637/1666666 [30:52<2:01:30, 211.10it/s]

finished frames 765600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127725/1666666 [30:52<2:01:41, 210.78it/s]

finished frames 766200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127835/1666666 [30:53<2:01:36, 210.89it/s]

finished frames 766800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 127923/1666666 [30:53<2:02:03, 210.12it/s]

finished frames 767400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128032/1666666 [30:54<2:04:16, 206.34it/s]

finished frames 768000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128141/1666666 [30:54<2:01:55, 210.32it/s]

finished frames 768600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128229/1666666 [30:55<2:01:40, 210.73it/s]

finished frames 769200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128339/1666666 [30:55<2:01:23, 211.22it/s]

finished frames 769800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128426/1666666 [30:56<2:01:45, 210.56it/s]

finished frames 770400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128536/1666666 [30:56<2:01:13, 211.48it/s]

finished frames 771000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128624/1666666 [30:57<2:01:41, 210.66it/s]

finished frames 771600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128734/1666666 [30:57<2:01:40, 210.66it/s]

finished frames 772200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128843/1666666 [30:58<2:07:06, 201.65it/s]

finished frames 772800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 128927/1666666 [30:58<2:11:13, 195.31it/s]

finished frames 773400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129033/1666666 [30:59<2:06:13, 203.02it/s]

finished frames 774000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129141/1666666 [30:59<2:02:09, 209.79it/s]

finished frames 774600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129228/1666666 [31:00<2:01:56, 210.15it/s]

finished frames 775200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129338/1666666 [31:00<2:01:17, 211.26it/s]

finished frames 775800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129426/1666666 [31:01<2:00:56, 211.85it/s]

finished frames 776400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129536/1666666 [31:01<2:00:52, 211.95it/s]

finished frames 777000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129624/1666666 [31:01<2:01:19, 211.15it/s]

finished frames 777600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129734/1666666 [31:02<2:01:08, 211.45it/s]

finished frames 778200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129822/1666666 [31:02<2:01:41, 210.49it/s]

finished frames 778800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 129932/1666666 [31:03<2:00:52, 211.89it/s]

finished frames 779400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130041/1666666 [31:03<2:03:56, 206.64it/s]

finished frames 780000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130127/1666666 [31:04<2:02:22, 209.26it/s]

finished frames 780600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130236/1666666 [31:04<2:01:31, 210.73it/s]

finished frames 781200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130324/1666666 [31:05<2:01:19, 211.04it/s]

finished frames 781800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130434/1666666 [31:05<2:01:12, 211.25it/s]

finished frames 782400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130543/1666666 [31:06<2:01:23, 210.92it/s]

finished frames 783000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130631/1666666 [31:06<2:01:07, 211.36it/s]

finished frames 783600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130741/1666666 [31:07<2:00:47, 211.93it/s]

finished frames 784200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130829/1666666 [31:07<2:01:45, 210.22it/s]

finished frames 784800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 130938/1666666 [31:08<2:02:30, 208.92it/s]

finished frames 785400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131023/1666666 [31:08<2:04:36, 205.39it/s]

finished frames 786000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131130/1666666 [31:09<2:02:17, 209.27it/s]

finished frames 786600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131239/1666666 [31:09<2:03:37, 207.01it/s]

finished frames 787200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131326/1666666 [31:10<2:03:28, 207.23it/s]

finished frames 787800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131436/1666666 [31:10<1:59:13, 214.62it/s]

finished frames 788400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131524/1666666 [31:11<1:58:39, 215.63it/s]

finished frames 789000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131634/1666666 [31:11<1:58:37, 215.66it/s]

finished frames 789600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131744/1666666 [31:12<1:58:31, 215.84it/s]

finished frames 790200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131832/1666666 [31:12<1:58:50, 215.24it/s]

finished frames 790800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 131942/1666666 [31:12<1:58:36, 215.67it/s]

finished frames 791400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132030/1666666 [31:13<2:01:23, 210.69it/s]

finished frames 792000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132140/1666666 [31:13<1:58:53, 215.12it/s]

finished frames 792600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132228/1666666 [31:14<1:58:46, 215.31it/s]

finished frames 793200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132338/1666666 [31:14<1:58:35, 215.65it/s]

finished frames 793800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132426/1666666 [31:15<1:58:57, 214.95it/s]

finished frames 794400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132536/1666666 [31:15<1:59:09, 214.59it/s]

finished frames 795000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132624/1666666 [31:16<1:58:53, 215.05it/s]

finished frames 795600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132734/1666666 [31:16<1:58:30, 215.73it/s]

finished frames 796200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132844/1666666 [31:17<1:58:15, 216.15it/s]

finished frames 796800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 132932/1666666 [31:17<1:58:35, 215.56it/s]

finished frames 797400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133042/1666666 [31:18<2:01:03, 211.13it/s]

finished frames 798000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133130/1666666 [31:18<1:59:57, 213.05it/s]

finished frames 798600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133240/1666666 [31:19<1:58:55, 214.90it/s]

finished frames 799200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133328/1666666 [31:19<1:58:30, 215.64it/s]

finished frames 799800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133438/1666666 [31:19<1:57:50, 216.85it/s]

finished frames 800400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133526/1666666 [31:20<1:58:09, 216.26it/s]

finished frames 801000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133636/1666666 [31:20<2:00:55, 211.30it/s]

finished frames 801600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133724/1666666 [31:21<1:59:24, 213.97it/s]

finished frames 802200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133834/1666666 [31:21<1:58:38, 215.32it/s]

finished frames 802800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 133944/1666666 [31:22<1:58:26, 215.66it/s]

finished frames 803400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134032/1666666 [31:22<2:01:08, 210.87it/s]

finished frames 804000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134142/1666666 [31:23<1:59:53, 213.03it/s]

finished frames 804600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134230/1666666 [31:23<1:59:18, 214.07it/s]

finished frames 805200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134340/1666666 [31:24<1:58:58, 214.65it/s]

finished frames 805800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134428/1666666 [31:24<1:59:25, 213.83it/s]

finished frames 806400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134538/1666666 [31:25<1:59:20, 213.96it/s]

finished frames 807000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134626/1666666 [31:25<1:59:36, 213.48it/s]

finished frames 807600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134736/1666666 [31:26<1:59:39, 213.37it/s]

finished frames 808200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134824/1666666 [31:26<1:59:29, 213.67it/s]

finished frames 808800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 134934/1666666 [31:27<1:59:14, 214.09it/s]

finished frames 809400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135022/1666666 [31:27<2:03:13, 207.17it/s]

finished frames 810000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135131/1666666 [31:27<2:00:47, 211.32it/s]

finished frames 810600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135241/1666666 [31:28<1:59:47, 213.07it/s]

finished frames 811200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135329/1666666 [31:28<1:59:34, 213.43it/s]

finished frames 811800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135439/1666666 [31:29<1:59:21, 213.82it/s]

finished frames 812400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135527/1666666 [31:29<1:59:36, 213.34it/s]

finished frames 813000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135637/1666666 [31:30<1:59:20, 213.82it/s]

finished frames 813600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135725/1666666 [31:30<1:59:31, 213.47it/s]

finished frames 814200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135835/1666666 [31:31<1:59:29, 213.51it/s]

finished frames 814800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 135923/1666666 [31:31<1:58:57, 214.47it/s]

finished frames 815400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136032/1666666 [31:32<2:05:12, 203.75it/s]

finished frames 816000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136141/1666666 [31:32<2:02:46, 207.76it/s]

finished frames 816600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136229/1666666 [31:33<2:00:28, 211.71it/s]

finished frames 817200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136339/1666666 [31:33<1:59:41, 213.11it/s]

finished frames 817800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136427/1666666 [31:34<2:00:49, 211.09it/s]

finished frames 818400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136536/1666666 [31:34<2:00:46, 211.16it/s]

finished frames 819000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136624/1666666 [31:35<2:01:29, 209.89it/s]

finished frames 819600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136732/1666666 [31:35<2:01:11, 210.40it/s]

finished frames 820200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136840/1666666 [31:36<2:01:30, 209.84it/s]

finished frames 820800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 136927/1666666 [31:36<2:01:06, 210.51it/s]

finished frames 821400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137035/1666666 [31:37<2:03:39, 206.16it/s]

finished frames 822000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137122/1666666 [31:37<2:02:06, 208.76it/s]

finished frames 822600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137229/1666666 [31:37<2:01:58, 208.98it/s]

finished frames 823200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137337/1666666 [31:38<2:01:18, 210.13it/s]

finished frames 823800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137423/1666666 [31:38<2:01:33, 209.66it/s]

finished frames 824400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137529/1666666 [31:39<2:01:41, 209.43it/s]

finished frames 825000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137635/1666666 [31:39<2:01:58, 208.93it/s]

finished frames 825600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137741/1666666 [31:40<2:01:54, 209.04it/s]

finished frames 826200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137826/1666666 [31:40<2:01:55, 208.98it/s]

finished frames 826800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 137931/1666666 [31:41<2:02:00, 208.82it/s]

finished frames 827400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138037/1666666 [31:41<2:04:14, 205.07it/s]

finished frames 828000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138142/1666666 [31:42<2:02:35, 207.80it/s]

finished frames 828600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138226/1666666 [31:42<2:02:43, 207.58it/s]

finished frames 829200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138331/1666666 [31:43<2:12:34, 192.13it/s]

finished frames 829800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138436/1666666 [31:43<2:03:43, 205.86it/s]

finished frames 830400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138541/1666666 [31:44<2:02:25, 208.04it/s]

finished frames 831000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138625/1666666 [31:44<2:03:43, 205.83it/s]

finished frames 831600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138732/1666666 [31:45<2:01:53, 208.92it/s]

finished frames 832200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138838/1666666 [31:45<2:01:46, 209.11it/s]

finished frames 832800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 138922/1666666 [31:46<2:02:18, 208.18it/s]

finished frames 833400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139028/1666666 [31:46<2:04:23, 204.69it/s]

finished frames 834000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139134/1666666 [31:47<2:02:37, 207.61it/s]

finished frames 834600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139242/1666666 [31:47<2:00:23, 211.44it/s]

finished frames 835200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139330/1666666 [31:48<1:59:32, 212.93it/s]

finished frames 835800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139440/1666666 [31:48<1:59:06, 213.70it/s]

finished frames 836400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139528/1666666 [31:49<1:58:36, 214.60it/s]

finished frames 837000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139638/1666666 [31:49<1:58:42, 214.39it/s]

finished frames 837600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139726/1666666 [31:49<1:59:16, 213.36it/s]

finished frames 838200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139836/1666666 [31:50<1:58:58, 213.88it/s]

finished frames 838800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 139924/1666666 [31:50<1:58:42, 214.34it/s]

finished frames 839400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140034/1666666 [31:51<2:00:39, 210.88it/s]

finished frames 840000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140144/1666666 [31:51<1:58:06, 215.41it/s]

finished frames 840600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140232/1666666 [31:52<1:58:01, 215.55it/s]

finished frames 841200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140342/1666666 [31:52<1:57:53, 215.79it/s]

finished frames 841800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140430/1666666 [31:53<1:58:04, 215.45it/s]

finished frames 842400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140540/1666666 [31:53<1:58:22, 214.88it/s]

finished frames 843000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140629/1666666 [31:54<1:57:16, 216.88it/s]

finished frames 843600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140739/1666666 [31:54<1:59:50, 212.21it/s]

finished frames 844200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140827/1666666 [31:55<1:58:38, 214.36it/s]

finished frames 844800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 140937/1666666 [31:55<1:58:17, 214.96it/s]

finished frames 845400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 141025/1666666 [31:56<2:00:45, 210.58it/s]

finished frames 846000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 141135/1666666 [31:56<2:01:38, 209.02it/s]

finished frames 846600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 141223/1666666 [31:56<1:59:03, 213.53it/s]

finished frames 847200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 141333/1666666 [31:57<1:58:09, 215.16it/s]

finished frames 847800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 141443/1666666 [31:58<1:58:05, 215.25it/s]

finished frames 848400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 141531/1666666 [31:58<1:58:03, 215.32it/s]

finished frames 849000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  8%|▊         | 141641/1666666 [31:58<1:58:08, 215.16it/s]

finished frames 849600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 141729/1666666 [31:59<1:58:04, 215.25it/s]

finished frames 850200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 141839/1666666 [31:59<1:58:01, 215.32it/s]

finished frames 850800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 141927/1666666 [32:00<1:58:00, 215.34it/s]

finished frames 851400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142037/1666666 [32:00<2:00:54, 210.15it/s]

finished frames 852000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142125/1666666 [32:01<1:58:59, 213.53it/s]

finished frames 852600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142235/1666666 [32:01<1:58:28, 214.45it/s]

finished frames 853200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142323/1666666 [32:02<1:57:55, 215.43it/s]

finished frames 853800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142433/1666666 [32:02<1:58:25, 214.52it/s]

finished frames 854400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142543/1666666 [32:03<1:57:58, 215.32it/s]

finished frames 855000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142631/1666666 [32:03<1:57:21, 216.43it/s]

finished frames 855600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142741/1666666 [32:04<1:56:51, 217.34it/s]

finished frames 856200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142829/1666666 [32:04<1:57:08, 216.80it/s]

finished frames 856800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 142939/1666666 [32:04<1:57:11, 216.70it/s]

finished frames 857400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143027/1666666 [32:05<1:59:47, 211.97it/s]

finished frames 858000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143137/1666666 [32:05<2:01:04, 209.72it/s]

finished frames 858600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143225/1666666 [32:06<2:05:11, 202.83it/s]

finished frames 859200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143335/1666666 [32:06<1:58:17, 214.64it/s]

finished frames 859800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143423/1666666 [32:07<1:57:17, 216.45it/s]

finished frames 860400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143533/1666666 [32:07<1:56:53, 217.18it/s]

finished frames 861000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143643/1666666 [32:08<1:57:37, 215.80it/s]

finished frames 861600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143731/1666666 [32:08<1:56:48, 217.29it/s]

finished frames 862200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143841/1666666 [32:09<1:56:34, 217.71it/s]

finished frames 862800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 143929/1666666 [32:09<1:56:52, 217.13it/s]

finished frames 863400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144039/1666666 [32:10<1:59:11, 212.92it/s]

finished frames 864000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144127/1666666 [32:10<1:57:18, 216.30it/s]

finished frames 864600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144237/1666666 [32:11<1:56:48, 217.23it/s]

finished frames 865200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144325/1666666 [32:11<1:56:38, 217.53it/s]

finished frames 865800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144435/1666666 [32:11<1:56:29, 217.80it/s]

finished frames 866400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144523/1666666 [32:12<1:56:28, 217.81it/s]

finished frames 867000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144633/1666666 [32:12<1:56:48, 217.15it/s]

finished frames 867600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144743/1666666 [32:13<1:56:47, 217.18it/s]

finished frames 868200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144831/1666666 [32:13<1:56:55, 216.93it/s]

finished frames 868800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 144941/1666666 [32:14<1:56:41, 217.36it/s]

finished frames 869400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 145029/1666666 [32:14<1:59:29, 212.23it/s]

finished frames 870000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 145139/1666666 [32:15<1:57:40, 215.49it/s]

finished frames 870600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 145227/1666666 [32:15<1:57:25, 215.93it/s]

finished frames 871200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 145337/1666666 [32:16<1:57:25, 215.93it/s]

finished frames 871800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 145425/1666666 [32:16<1:57:22, 216.00it/s]

finished frames 872400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 145535/1666666 [32:17<1:56:54, 216.86it/s]

finished frames 873000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 145623/1666666 [32:17<1:56:28, 217.66it/s]

finished frames 873600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▊         | 145733/1666666 [32:17<1:56:56, 216.77it/s]

finished frames 874200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 145843/1666666 [32:18<1:56:57, 216.73it/s]

finished frames 874800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 145931/1666666 [32:18<1:56:44, 217.10it/s]

finished frames 875400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146041/1666666 [32:19<1:59:30, 212.05it/s]

finished frames 876000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146129/1666666 [32:19<1:58:00, 214.74it/s]

finished frames 876600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146239/1666666 [32:20<1:56:49, 216.90it/s]

finished frames 877200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146327/1666666 [32:20<1:56:44, 217.04it/s]

finished frames 877800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146437/1666666 [32:21<1:56:49, 216.87it/s]

finished frames 878400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146525/1666666 [32:21<1:56:53, 216.74it/s]

finished frames 879000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146635/1666666 [32:22<1:56:48, 216.89it/s]

finished frames 879600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146723/1666666 [32:22<1:56:23, 217.66it/s]

finished frames 880200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146833/1666666 [32:23<1:56:34, 217.29it/s]

finished frames 880800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 146943/1666666 [32:23<1:56:29, 217.44it/s]

finished frames 881400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147031/1666666 [32:24<1:59:19, 212.25it/s]

finished frames 882000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147141/1666666 [32:24<1:56:50, 216.74it/s]

finished frames 882600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147229/1666666 [32:24<1:56:16, 217.80it/s]

finished frames 883200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147339/1666666 [32:25<1:56:13, 217.86it/s]

finished frames 883800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147427/1666666 [32:25<1:56:17, 217.72it/s]

finished frames 884400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147537/1666666 [32:26<1:56:14, 217.81it/s]

finished frames 885000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147625/1666666 [32:26<1:56:26, 217.42it/s]

finished frames 885600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147735/1666666 [32:27<1:56:19, 217.64it/s]

finished frames 886200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147823/1666666 [32:27<1:56:35, 217.11it/s]

finished frames 886800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 147933/1666666 [32:28<1:56:22, 217.52it/s]

finished frames 887400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148021/1666666 [32:28<2:00:08, 210.68it/s]

finished frames 888000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148131/1666666 [32:29<2:00:44, 209.60it/s]

finished frames 888600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148241/1666666 [32:29<1:57:07, 216.06it/s]

finished frames 889200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148329/1666666 [32:30<1:56:47, 216.66it/s]

finished frames 889800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148439/1666666 [32:30<1:56:26, 217.32it/s]

finished frames 890400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148527/1666666 [32:30<1:56:27, 217.25it/s]

finished frames 891000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148637/1666666 [32:31<1:56:17, 217.56it/s]

finished frames 891600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148725/1666666 [32:31<1:56:16, 217.57it/s]

finished frames 892200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148835/1666666 [32:32<1:56:27, 217.23it/s]

finished frames 892800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 148923/1666666 [32:32<1:56:42, 216.75it/s]

finished frames 893400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149033/1666666 [32:33<1:59:15, 212.10it/s]

finished frames 894000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149143/1666666 [32:33<1:56:40, 216.77it/s]

finished frames 894600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149231/1666666 [32:34<1:56:22, 217.33it/s]

finished frames 895200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149341/1666666 [32:34<1:56:22, 217.29it/s]

finished frames 895800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149429/1666666 [32:35<1:56:00, 217.97it/s]

finished frames 896400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149539/1666666 [32:35<1:56:02, 217.92it/s]

finished frames 897000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149627/1666666 [32:36<1:56:14, 217.50it/s]

finished frames 897600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149737/1666666 [32:36<1:56:08, 217.67it/s]

finished frames 898200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149825/1666666 [32:36<1:56:25, 217.14it/s]

finished frames 898800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 149935/1666666 [32:37<1:56:09, 217.61it/s]

finished frames 899400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150023/1666666 [32:37<1:58:40, 212.99it/s]

finished frames 900000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150133/1666666 [32:38<1:56:37, 216.73it/s]

finished frames 900600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150243/1666666 [32:38<1:56:19, 217.26it/s]

finished frames 901200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150331/1666666 [32:39<1:56:25, 217.07it/s]

finished frames 901800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150441/1666666 [32:39<1:59:47, 210.96it/s]

finished frames 902400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150529/1666666 [32:40<2:02:27, 206.35it/s]

finished frames 903000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150639/1666666 [32:40<1:57:09, 215.66it/s]

finished frames 903600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150727/1666666 [32:41<1:56:32, 216.79it/s]

finished frames 904200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150837/1666666 [32:41<1:56:16, 217.29it/s]

finished frames 904800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 150925/1666666 [32:42<1:56:29, 216.85it/s]

finished frames 905400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151035/1666666 [32:42<1:58:51, 212.51it/s]

finished frames 906000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151123/1666666 [32:43<1:56:54, 216.07it/s]

finished frames 906600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151233/1666666 [32:43<1:56:12, 217.35it/s]

finished frames 907200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151343/1666666 [32:44<1:56:02, 217.65it/s]

finished frames 907800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151431/1666666 [32:44<1:56:04, 217.57it/s]

finished frames 908400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151541/1666666 [32:44<1:56:04, 217.55it/s]

finished frames 909000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151629/1666666 [32:45<1:56:05, 217.51it/s]

finished frames 909600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151739/1666666 [32:45<1:56:12, 217.26it/s]

finished frames 910200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151827/1666666 [32:46<1:56:18, 217.07it/s]

finished frames 910800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 151937/1666666 [32:46<1:56:11, 217.28it/s]

finished frames 911400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152025/1666666 [32:47<1:59:09, 211.84it/s]

finished frames 912000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152135/1666666 [32:47<1:56:26, 216.78it/s]

finished frames 912600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152223/1666666 [32:48<1:56:05, 217.41it/s]

finished frames 913200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152333/1666666 [32:48<1:56:05, 217.40it/s]

finished frames 913800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152443/1666666 [32:49<1:55:53, 217.76it/s]

finished frames 914400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152531/1666666 [32:49<1:55:58, 217.59it/s]

finished frames 915000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152641/1666666 [32:50<1:56:09, 217.23it/s]

finished frames 915600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152729/1666666 [32:50<1:56:11, 217.15it/s]

finished frames 916200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152839/1666666 [32:50<2:01:11, 208.19it/s]

finished frames 916800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 152927/1666666 [32:51<2:07:35, 197.72it/s]

finished frames 917400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153037/1666666 [32:51<2:00:14, 209.82it/s]

finished frames 918000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153125/1666666 [32:52<1:57:04, 215.47it/s]

finished frames 918600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153235/1666666 [32:52<1:56:13, 217.04it/s]

finished frames 919200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153323/1666666 [32:53<1:56:06, 217.25it/s]

finished frames 919800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153433/1666666 [32:53<1:56:02, 217.35it/s]

finished frames 920400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153543/1666666 [32:54<1:55:47, 217.79it/s]

finished frames 921000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153631/1666666 [32:54<1:55:53, 217.60it/s]

finished frames 921600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153741/1666666 [32:55<1:55:57, 217.46it/s]

finished frames 922200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153829/1666666 [32:55<1:56:17, 216.83it/s]

finished frames 922800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 153939/1666666 [32:56<1:56:05, 217.18it/s]

finished frames 923400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154027/1666666 [32:56<1:58:03, 213.56it/s]

finished frames 924000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154140/1666666 [32:57<1:55:07, 218.97it/s]

finished frames 924600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154228/1666666 [32:57<1:55:40, 217.92it/s]

finished frames 925200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154338/1666666 [32:57<1:55:52, 217.53it/s]

finished frames 925800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154426/1666666 [32:58<1:55:58, 217.32it/s]

finished frames 926400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154536/1666666 [32:58<1:55:50, 217.55it/s]

finished frames 927000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154624/1666666 [32:59<1:55:53, 217.44it/s]

finished frames 927600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154734/1666666 [32:59<1:56:08, 216.96it/s]

finished frames 928200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154844/1666666 [33:00<1:56:01, 217.17it/s]

finished frames 928800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 154932/1666666 [33:00<1:56:06, 216.99it/s]

finished frames 929400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155042/1666666 [33:01<1:58:34, 212.46it/s]

finished frames 930000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155130/1666666 [33:01<1:56:50, 215.63it/s]

finished frames 930600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155240/1666666 [33:02<1:56:10, 216.83it/s]

finished frames 931200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155328/1666666 [33:02<1:56:08, 216.89it/s]

finished frames 931800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155438/1666666 [33:03<1:59:05, 211.48it/s]

finished frames 932400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155526/1666666 [33:03<1:56:38, 215.93it/s]

finished frames 933000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155636/1666666 [33:03<1:55:58, 217.16it/s]

finished frames 933600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155724/1666666 [33:04<1:55:59, 217.10it/s]

finished frames 934200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155834/1666666 [33:04<1:55:55, 217.22it/s]

finished frames 934800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 155944/1666666 [33:05<1:55:46, 217.48it/s]

finished frames 935400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156032/1666666 [33:05<1:58:19, 212.77it/s]

finished frames 936000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156142/1666666 [33:06<1:56:11, 216.68it/s]

finished frames 936600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156230/1666666 [33:06<1:55:51, 217.28it/s]

finished frames 937200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156340/1666666 [33:07<1:55:41, 217.58it/s]

finished frames 937800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156428/1666666 [33:07<1:55:57, 217.06it/s]

finished frames 938400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156538/1666666 [33:08<1:55:54, 217.15it/s]

finished frames 939000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156626/1666666 [33:08<1:56:01, 216.93it/s]

finished frames 939600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156736/1666666 [33:09<1:55:45, 217.41it/s]

finished frames 940200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156824/1666666 [33:09<1:55:59, 216.94it/s]

finished frames 940800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 156934/1666666 [33:09<1:55:57, 217.00it/s]

finished frames 941400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157022/1666666 [33:10<1:59:43, 210.17it/s]

finished frames 942000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157132/1666666 [33:10<1:56:33, 215.85it/s]

finished frames 942600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157242/1666666 [33:11<1:55:57, 216.94it/s]

finished frames 943200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157330/1666666 [33:11<1:55:58, 216.92it/s]

finished frames 943800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157440/1666666 [33:12<1:55:53, 217.05it/s]

finished frames 944400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157528/1666666 [33:12<1:55:59, 216.83it/s]

finished frames 945000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157638/1666666 [33:13<1:55:47, 217.20it/s]

finished frames 945600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157726/1666666 [33:13<1:59:21, 210.69it/s]

finished frames 946200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157836/1666666 [33:14<2:03:32, 203.55it/s]

finished frames 946800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 157924/1666666 [33:14<1:57:50, 213.39it/s]

finished frames 947400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 158034/1666666 [33:15<1:58:38, 211.94it/s]

finished frames 948000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 158144/1666666 [33:15<1:56:10, 216.41it/s]

finished frames 948600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


  9%|▉         | 158232/1666666 [33:16<1:56:05, 216.57it/s]

finished frames 949200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 158342/1666666 [33:16<1:55:40, 217.32it/s]

finished frames 949800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 158430/1666666 [33:16<1:56:02, 216.63it/s]

finished frames 950400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 158540/1666666 [33:17<1:55:48, 217.05it/s]

finished frames 951000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 158628/1666666 [33:17<1:55:40, 217.29it/s]

finished frames 951600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 158738/1666666 [33:18<1:55:45, 217.11it/s]

finished frames 952200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 158826/1666666 [33:18<1:55:38, 217.30it/s]

finished frames 952800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 158936/1666666 [33:19<1:55:42, 217.17it/s]

finished frames 953400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159024/1666666 [33:19<1:58:05, 212.79it/s]

finished frames 954000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159134/1666666 [33:20<1:56:12, 216.21it/s]

finished frames 954600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159244/1666666 [33:20<1:55:39, 217.24it/s]

finished frames 955200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159332/1666666 [33:21<1:55:46, 216.99it/s]

finished frames 955800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159442/1666666 [33:21<1:55:40, 217.16it/s]

finished frames 956400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159530/1666666 [33:22<1:55:49, 216.88it/s]

finished frames 957000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159640/1666666 [33:22<1:55:25, 217.60it/s]

finished frames 957600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159728/1666666 [33:23<1:55:32, 217.37it/s]

finished frames 958200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159838/1666666 [33:23<1:55:40, 217.11it/s]

finished frames 958800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 159926/1666666 [33:23<1:55:43, 217.01it/s]

finished frames 959400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160036/1666666 [33:24<1:58:22, 212.11it/s]

finished frames 960000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160124/1666666 [33:24<2:00:11, 208.90it/s]

finished frames 960600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160234/1666666 [33:25<2:06:35, 198.35it/s]

finished frames 961200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160344/1666666 [33:25<1:57:28, 213.71it/s]

finished frames 961800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160432/1666666 [33:26<1:56:17, 215.88it/s]

finished frames 962400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160542/1666666 [33:26<1:55:35, 217.17it/s]

finished frames 963000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160630/1666666 [33:27<1:55:47, 216.78it/s]

finished frames 963600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160740/1666666 [33:27<1:56:16, 215.86it/s]

finished frames 964200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160828/1666666 [33:28<1:56:18, 215.80it/s]

finished frames 964800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 160938/1666666 [33:28<1:56:04, 216.21it/s]

finished frames 965400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161026/1666666 [33:29<2:00:03, 209.01it/s]

finished frames 966000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161136/1666666 [33:29<1:56:25, 215.51it/s]

finished frames 966600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161224/1666666 [33:30<1:56:18, 215.71it/s]

finished frames 967200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161334/1666666 [33:30<1:56:30, 215.34it/s]

finished frames 967800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161444/1666666 [33:31<1:56:31, 215.28it/s]

finished frames 968400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161532/1666666 [33:31<1:56:36, 215.12it/s]

finished frames 969000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161642/1666666 [33:31<1:56:28, 215.35it/s]

finished frames 969600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161730/1666666 [33:32<1:56:30, 215.29it/s]

finished frames 970200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161840/1666666 [33:32<1:56:04, 216.08it/s]

finished frames 970800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 161928/1666666 [33:33<1:56:16, 215.69it/s]

finished frames 971400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162038/1666666 [33:33<1:58:16, 212.02it/s]

finished frames 972000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162126/1666666 [33:34<1:56:26, 215.35it/s]

finished frames 972600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162236/1666666 [33:34<1:56:23, 215.43it/s]

finished frames 973200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162324/1666666 [33:35<1:56:09, 215.84it/s]

finished frames 973800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162434/1666666 [33:35<1:56:16, 215.60it/s]

finished frames 974400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162544/1666666 [33:36<2:00:29, 208.04it/s]

finished frames 975000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162631/1666666 [33:36<1:58:39, 211.27it/s]

finished frames 975600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162741/1666666 [33:37<1:57:37, 213.11it/s]

finished frames 976200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162829/1666666 [33:37<1:57:37, 213.09it/s]

finished frames 976800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 162939/1666666 [33:38<1:57:56, 212.49it/s]

finished frames 977400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163027/1666666 [33:38<1:59:42, 209.34it/s]

finished frames 978000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163137/1666666 [33:39<1:56:52, 214.41it/s]

finished frames 978600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163225/1666666 [33:39<1:56:23, 215.27it/s]

finished frames 979200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163335/1666666 [33:39<1:56:06, 215.79it/s]

finished frames 979800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163423/1666666 [33:40<1:56:11, 215.63it/s]

finished frames 980400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163533/1666666 [33:40<1:56:11, 215.62it/s]

finished frames 981000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163643/1666666 [33:41<1:56:14, 215.52it/s]

finished frames 981600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163731/1666666 [33:41<1:56:12, 215.54it/s]

finished frames 982200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163841/1666666 [33:42<1:58:45, 210.92it/s]

finished frames 982800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 163926/1666666 [33:42<1:59:44, 209.16it/s]

finished frames 983400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164032/1666666 [33:43<2:02:43, 204.07it/s]

finished frames 984000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164138/1666666 [33:43<2:00:17, 208.18it/s]

finished frames 984600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164222/1666666 [33:44<1:59:54, 208.83it/s]

finished frames 985200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164330/1666666 [33:44<1:59:18, 209.86it/s]

finished frames 985800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164438/1666666 [33:45<1:58:52, 210.62it/s]

finished frames 986400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164526/1666666 [33:45<1:58:59, 210.41it/s]

finished frames 987000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164634/1666666 [33:46<1:58:58, 210.41it/s]

finished frames 987600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164743/1666666 [33:46<1:59:09, 210.07it/s]

finished frames 988200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164829/1666666 [33:47<1:59:47, 208.96it/s]

finished frames 988800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 164937/1666666 [33:47<2:02:26, 204.42it/s]

finished frames 989400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165022/1666666 [33:48<2:12:24, 189.02it/s]

finished frames 990000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165130/1666666 [33:48<2:00:23, 207.86it/s]

finished frames 990600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165239/1666666 [33:49<1:58:34, 211.03it/s]

finished frames 991200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165327/1666666 [33:49<1:57:26, 213.06it/s]

finished frames 991800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165437/1666666 [33:50<1:57:42, 212.58it/s]

finished frames 992400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165525/1666666 [33:50<1:57:32, 212.85it/s]

finished frames 993000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165635/1666666 [33:50<1:57:24, 213.09it/s]

finished frames 993600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165723/1666666 [33:51<1:57:49, 212.31it/s]

finished frames 994200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165833/1666666 [33:51<1:57:54, 212.16it/s]

finished frames 994800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 165943/1666666 [33:52<1:58:03, 211.87it/s]

finished frames 995400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 166029/1666666 [33:52<2:01:03, 206.60it/s]

finished frames 996000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 166138/1666666 [33:53<1:58:42, 210.66it/s]

finished frames 996600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 166222/1666666 [33:53<2:13:31, 187.28it/s]

finished frames 997200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 166332/1666666 [33:54<2:00:06, 208.20it/s]

finished frames 997800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 166441/1666666 [33:54<2:00:29, 207.51it/s]

finished frames 998400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 166528/1666666 [33:55<1:59:10, 209.78it/s]

finished frames 999000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|▉         | 166637/1666666 [33:55<1:58:43, 210.57it/s]

finished frames 999600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 166725/1666666 [33:56<1:58:43, 210.58it/s]

finished frames 1000200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 166833/1666666 [33:56<2:02:43, 203.68it/s]

finished frames 1000800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 166942/1666666 [33:57<1:59:10, 209.72it/s]

finished frames 1001400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167026/1666666 [33:57<2:02:00, 204.85it/s]

finished frames 1002000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167131/1666666 [33:58<2:00:09, 208.00it/s]

finished frames 1002600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167237/1666666 [33:58<2:04:24, 200.86it/s]

finished frames 1003200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167321/1666666 [33:59<2:11:44, 189.68it/s]

finished frames 1003800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167426/1666666 [33:59<2:01:41, 205.34it/s]

finished frames 1004400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167536/1666666 [34:00<1:57:40, 212.33it/s]

finished frames 1005000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167624/1666666 [34:00<1:56:48, 213.89it/s]

finished frames 1005600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167734/1666666 [34:01<1:56:26, 214.53it/s]

finished frames 1006200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167844/1666666 [34:01<1:56:14, 214.89it/s]

finished frames 1006800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 167932/1666666 [34:02<1:57:05, 213.34it/s]

finished frames 1007400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168042/1666666 [34:02<1:58:58, 209.94it/s]

finished frames 1008000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168130/1666666 [34:02<1:57:01, 213.41it/s]

finished frames 1008600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168240/1666666 [34:03<1:56:18, 214.71it/s]

finished frames 1009200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168328/1666666 [34:03<1:57:06, 213.24it/s]

finished frames 1009800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168438/1666666 [34:04<1:57:03, 213.32it/s]

finished frames 1010400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168526/1666666 [34:04<1:56:34, 214.19it/s]

finished frames 1011000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168636/1666666 [34:05<1:56:27, 214.40it/s]

finished frames 1011600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168724/1666666 [34:05<1:56:48, 213.75it/s]

finished frames 1012200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168834/1666666 [34:06<1:56:29, 214.29it/s]

finished frames 1012800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 168944/1666666 [34:06<1:56:18, 214.61it/s]

finished frames 1013400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169032/1666666 [34:07<1:58:56, 209.86it/s]

finished frames 1014000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169142/1666666 [34:07<1:56:46, 213.73it/s]

finished frames 1014600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169230/1666666 [34:08<1:56:26, 214.32it/s]

finished frames 1015200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169340/1666666 [34:08<1:56:43, 213.81it/s]

finished frames 1015800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169428/1666666 [34:09<1:56:32, 214.11it/s]

finished frames 1016400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169538/1666666 [34:09<1:56:01, 215.05it/s]

finished frames 1017000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169626/1666666 [34:10<2:06:08, 197.81it/s]

finished frames 1017600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169735/1666666 [34:10<2:03:34, 201.90it/s]

finished frames 1018200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169823/1666666 [34:10<1:57:49, 211.72it/s]

finished frames 1018800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 169933/1666666 [34:11<1:56:10, 214.71it/s]

finished frames 1019400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170021/1666666 [34:11<1:59:30, 208.71it/s]

finished frames 1020000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170131/1666666 [34:12<1:56:29, 214.13it/s]

finished frames 1020600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170241/1666666 [34:12<1:56:23, 214.28it/s]

finished frames 1021200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170329/1666666 [34:13<1:57:23, 212.43it/s]

finished frames 1021800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170439/1666666 [34:13<1:57:25, 212.37it/s]

finished frames 1022400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170527/1666666 [34:14<1:57:40, 211.91it/s]

finished frames 1023000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170637/1666666 [34:14<1:57:31, 212.16it/s]

finished frames 1023600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170725/1666666 [34:15<1:57:32, 212.12it/s]

finished frames 1024200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170835/1666666 [34:15<1:58:17, 210.75it/s]

finished frames 1024800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 170943/1666666 [34:16<1:58:51, 209.75it/s]

finished frames 1025400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171028/1666666 [34:16<2:01:43, 204.79it/s]

finished frames 1026000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171137/1666666 [34:17<1:58:49, 209.77it/s]

finished frames 1026600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171223/1666666 [34:17<1:58:37, 210.10it/s]

finished frames 1027200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171332/1666666 [34:18<1:58:43, 209.90it/s]

finished frames 1027800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171438/1666666 [34:18<1:58:53, 209.61it/s]

finished frames 1028400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171522/1666666 [34:19<1:59:16, 208.91it/s]

finished frames 1029000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171628/1666666 [34:19<1:59:03, 209.28it/s]

finished frames 1029600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171734/1666666 [34:20<1:58:53, 209.57it/s]

finished frames 1030200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171841/1666666 [34:20<1:58:53, 209.56it/s]

finished frames 1030800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 171926/1666666 [34:20<1:59:01, 209.30it/s]

finished frames 1031400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172030/1666666 [34:21<2:07:23, 195.53it/s]

finished frames 1032000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172134/1666666 [34:22<2:03:23, 201.86it/s]

finished frames 1032600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172240/1666666 [34:22<1:59:52, 207.78it/s]

finished frames 1033200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172324/1666666 [34:22<1:59:30, 208.41it/s]

finished frames 1033800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172429/1666666 [34:23<1:59:38, 208.15it/s]

finished frames 1034400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172534/1666666 [34:23<1:59:35, 208.22it/s]

finished frames 1035000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172639/1666666 [34:24<1:59:25, 208.49it/s]

finished frames 1035600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172725/1666666 [34:24<1:59:21, 208.60it/s]

finished frames 1036200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172831/1666666 [34:25<1:59:00, 209.21it/s]

finished frames 1036800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 172937/1666666 [34:25<1:58:55, 209.32it/s]

finished frames 1037400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173022/1666666 [34:26<2:02:24, 203.36it/s]

finished frames 1038000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173128/1666666 [34:26<2:00:02, 207.37it/s]

finished frames 1038600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173234/1666666 [34:27<1:59:06, 208.97it/s]

finished frames 1039200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173341/1666666 [34:27<1:58:51, 209.39it/s]

finished frames 1039800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173426/1666666 [34:28<1:59:25, 208.38it/s]

finished frames 1040400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173531/1666666 [34:28<1:59:20, 208.54it/s]

finished frames 1041000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173637/1666666 [34:29<1:59:01, 209.07it/s]

finished frames 1041600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173742/1666666 [34:29<1:59:25, 208.34it/s]

finished frames 1042200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173826/1666666 [34:30<1:59:20, 208.47it/s]

finished frames 1042800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 173933/1666666 [34:30<1:58:59, 209.07it/s]

finished frames 1043400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174039/1666666 [34:31<2:01:50, 204.18it/s]

finished frames 1044000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174123/1666666 [34:31<2:00:06, 207.10it/s]

finished frames 1044600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174228/1666666 [34:32<1:59:24, 208.30it/s]

finished frames 1045200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174333/1666666 [34:32<2:06:42, 196.30it/s]

finished frames 1045800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174438/1666666 [34:33<2:04:36, 199.58it/s]

finished frames 1046400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174522/1666666 [34:33<2:01:12, 205.19it/s]

finished frames 1047000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174627/1666666 [34:34<2:00:26, 206.48it/s]

finished frames 1047600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174735/1666666 [34:34<1:58:43, 209.43it/s]

finished frames 1048200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174822/1666666 [34:35<1:58:24, 210.00it/s]

finished frames 1048800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 10%|█         | 174931/1666666 [34:35<1:57:52, 210.92it/s]

finished frames 1049400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175041/1666666 [34:36<1:58:32, 209.73it/s]

finished frames 1050000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175128/1666666 [34:36<1:56:01, 214.24it/s]

finished frames 1050600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175238/1666666 [34:37<1:55:17, 215.59it/s]

finished frames 1051200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175326/1666666 [34:37<1:55:16, 215.63it/s]

finished frames 1051800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175436/1666666 [34:37<1:55:18, 215.55it/s]

finished frames 1052400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175524/1666666 [34:38<1:55:24, 215.35it/s]

finished frames 1053000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175634/1666666 [34:38<1:54:57, 216.17it/s]

finished frames 1053600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175744/1666666 [34:39<1:54:47, 216.48it/s]

finished frames 1054200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175832/1666666 [34:39<1:55:01, 216.02it/s]

finished frames 1054800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 175942/1666666 [34:40<1:54:52, 216.28it/s]

finished frames 1055400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176029/1666666 [34:40<2:00:59, 205.32it/s]

finished frames 1056000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176134/1666666 [34:41<2:00:42, 205.81it/s]

finished frames 1056600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176239/1666666 [34:41<1:59:26, 207.96it/s]

finished frames 1057200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176323/1666666 [34:42<1:59:11, 208.40it/s]

finished frames 1057800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176428/1666666 [34:42<1:59:20, 208.13it/s]

finished frames 1058400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176533/1666666 [34:43<1:59:01, 208.64it/s]

finished frames 1059000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176638/1666666 [34:43<1:59:23, 207.99it/s]

finished frames 1059600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176722/1666666 [34:44<1:59:29, 207.83it/s]

finished frames 1060200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176827/1666666 [34:44<2:02:08, 203.28it/s]

finished frames 1060800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 176932/1666666 [34:45<1:59:33, 207.68it/s]

finished frames 1061400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177037/1666666 [34:45<2:01:36, 204.16it/s]

finished frames 1062000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177124/1666666 [34:46<1:59:03, 208.53it/s]

finished frames 1062600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177232/1666666 [34:46<1:58:21, 209.73it/s]

finished frames 1063200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177341/1666666 [34:47<1:59:59, 206.85it/s]

finished frames 1063800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177427/1666666 [34:47<1:57:30, 211.21it/s]

finished frames 1064400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177537/1666666 [34:48<1:55:02, 215.75it/s]

finished frames 1065000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177625/1666666 [34:48<1:55:23, 215.07it/s]

finished frames 1065600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177735/1666666 [34:48<1:55:24, 215.03it/s]

finished frames 1066200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177823/1666666 [34:49<1:55:07, 215.54it/s]

finished frames 1066800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 177933/1666666 [34:49<1:54:49, 216.09it/s]

finished frames 1067400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178022/1666666 [34:50<1:58:05, 210.08it/s]

finished frames 1068000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178132/1666666 [34:50<1:55:58, 213.91it/s]

finished frames 1068600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178242/1666666 [34:51<1:55:01, 215.68it/s]

finished frames 1069200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178330/1666666 [34:51<1:54:43, 216.21it/s]

finished frames 1069800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178440/1666666 [34:52<1:54:43, 216.21it/s]

finished frames 1070400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178528/1666666 [34:52<1:55:05, 215.50it/s]

finished frames 1071000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178638/1666666 [34:53<1:54:46, 216.09it/s]

finished frames 1071600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178726/1666666 [34:53<1:54:33, 216.47it/s]

finished frames 1072200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178836/1666666 [34:54<1:54:34, 216.43it/s]

finished frames 1072800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 178924/1666666 [34:54<1:54:52, 215.84it/s]

finished frames 1073400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179034/1666666 [34:55<2:03:58, 199.98it/s]

finished frames 1074000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179144/1666666 [34:55<1:56:56, 212.01it/s]

finished frames 1074600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179232/1666666 [34:55<1:55:20, 214.93it/s]

finished frames 1075200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179342/1666666 [34:56<1:54:40, 216.16it/s]

finished frames 1075800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179430/1666666 [34:56<1:54:58, 215.59it/s]

finished frames 1076400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179540/1666666 [34:57<1:54:37, 216.24it/s]

finished frames 1077000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179628/1666666 [34:57<1:54:32, 216.38it/s]

finished frames 1077600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179738/1666666 [34:58<1:54:19, 216.77it/s]

finished frames 1078200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179826/1666666 [34:58<1:54:57, 215.57it/s]

finished frames 1078800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 179936/1666666 [34:59<1:54:51, 215.72it/s]

finished frames 1079400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180024/1666666 [34:59<1:57:17, 211.24it/s]

finished frames 1080000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180134/1666666 [35:00<1:55:10, 215.10it/s]

finished frames 1080600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180244/1666666 [35:00<1:54:29, 216.39it/s]

finished frames 1081200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180332/1666666 [35:01<1:54:32, 216.26it/s]

finished frames 1081800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180442/1666666 [35:01<1:54:08, 217.02it/s]

finished frames 1082400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180530/1666666 [35:01<1:54:35, 216.16it/s]

finished frames 1083000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180640/1666666 [35:02<1:54:29, 216.31it/s]

finished frames 1083600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180728/1666666 [35:02<1:54:42, 215.90it/s]

finished frames 1084200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180838/1666666 [35:03<1:54:29, 216.28it/s]

finished frames 1084800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 180926/1666666 [35:03<1:55:18, 214.74it/s]

finished frames 1085400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181036/1666666 [35:04<1:57:15, 211.16it/s]

finished frames 1086000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181124/1666666 [35:04<1:56:04, 213.31it/s]

finished frames 1086600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181234/1666666 [35:05<1:55:16, 214.77it/s]

finished frames 1087200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181344/1666666 [35:05<1:55:02, 215.18it/s]

finished frames 1087800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181432/1666666 [35:06<2:05:22, 197.44it/s]

finished frames 1088400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181542/1666666 [35:06<1:56:47, 211.93it/s]

finished frames 1089000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181630/1666666 [35:07<1:59:04, 207.84it/s]

finished frames 1089600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181739/1666666 [35:07<1:55:54, 213.51it/s]

finished frames 1090200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181827/1666666 [35:08<1:55:18, 214.62it/s]

finished frames 1090800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 181937/1666666 [35:08<1:55:41, 213.90it/s]

finished frames 1091400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182024/1666666 [35:09<1:58:48, 208.26it/s]

finished frames 1092000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182130/1666666 [35:09<1:58:17, 209.15it/s]

finished frames 1092600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182236/1666666 [35:10<1:58:19, 209.09it/s]

finished frames 1093200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182343/1666666 [35:10<1:58:04, 209.53it/s]

finished frames 1093800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182429/1666666 [35:10<1:57:58, 209.67it/s]

finished frames 1094400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182535/1666666 [35:11<1:57:58, 209.66it/s]

finished frames 1095000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182622/1666666 [35:11<1:57:38, 210.26it/s]

finished frames 1095600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182730/1666666 [35:12<1:57:48, 209.95it/s]

finished frames 1096200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182837/1666666 [35:12<1:57:55, 209.71it/s]

finished frames 1096800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 182923/1666666 [35:13<1:58:04, 209.44it/s]

finished frames 1097400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183029/1666666 [35:13<2:00:28, 205.24it/s]

finished frames 1098000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183138/1666666 [35:14<1:58:00, 209.51it/s]

finished frames 1098600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183225/1666666 [35:14<1:57:34, 210.28it/s]

finished frames 1099200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183335/1666666 [35:15<1:57:04, 211.15it/s]

finished frames 1099800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183423/1666666 [35:15<1:57:34, 210.26it/s]

finished frames 1100400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183532/1666666 [35:16<1:57:39, 210.10it/s]

finished frames 1101000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183642/1666666 [35:16<1:57:21, 210.60it/s]

finished frames 1101600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183728/1666666 [35:17<1:58:02, 209.38it/s]

finished frames 1102200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183835/1666666 [35:17<2:07:33, 193.74it/s]

finished frames 1102800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 183942/1666666 [35:18<2:04:22, 198.68it/s]

finished frames 1103400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184026/1666666 [35:18<2:02:26, 201.83it/s]

finished frames 1104000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184131/1666666 [35:19<1:58:49, 207.93it/s]

finished frames 1104600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184239/1666666 [35:19<1:57:54, 209.54it/s]

finished frames 1105200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184324/1666666 [35:20<1:58:00, 209.35it/s]

finished frames 1105800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184432/1666666 [35:20<1:57:33, 210.14it/s]

finished frames 1106400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184541/1666666 [35:21<1:57:22, 210.45it/s]

finished frames 1107000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184629/1666666 [35:21<1:57:18, 210.56it/s]

finished frames 1107600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184737/1666666 [35:22<1:57:34, 210.07it/s]

finished frames 1108200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184822/1666666 [35:22<1:57:57, 209.39it/s]

finished frames 1108800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 184929/1666666 [35:23<1:57:33, 210.08it/s]

finished frames 1109400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185035/1666666 [35:23<2:00:25, 205.07it/s]

finished frames 1110000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185141/1666666 [35:24<1:58:48, 207.82it/s]

finished frames 1110600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185228/1666666 [35:24<1:56:06, 212.66it/s]

finished frames 1111200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185338/1666666 [35:24<1:55:32, 213.69it/s]

finished frames 1111800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185426/1666666 [35:25<1:55:22, 213.98it/s]

finished frames 1112400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185536/1666666 [35:25<1:57:29, 210.09it/s]

finished frames 1113000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185622/1666666 [35:26<1:58:02, 209.12it/s]

finished frames 1113600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185729/1666666 [35:26<1:57:49, 209.48it/s]

finished frames 1114200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185835/1666666 [35:27<1:58:16, 208.67it/s]

finished frames 1114800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 185940/1666666 [35:27<1:58:53, 207.57it/s]

finished frames 1115400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186024/1666666 [35:28<2:01:40, 202.81it/s]

finished frames 1116000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186130/1666666 [35:28<2:09:13, 190.95it/s]

finished frames 1116600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186236/1666666 [35:29<1:59:39, 206.21it/s]

finished frames 1117200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186343/1666666 [35:29<1:58:16, 208.59it/s]

finished frames 1117800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186428/1666666 [35:30<1:58:03, 208.96it/s]

finished frames 1118400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186537/1666666 [35:30<1:57:07, 210.62it/s]

finished frames 1119000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186623/1666666 [35:31<1:57:46, 209.44it/s]

finished frames 1119600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186728/1666666 [35:31<1:58:08, 208.79it/s]

finished frames 1120200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186835/1666666 [35:32<1:57:54, 209.17it/s]

finished frames 1120800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 186943/1666666 [35:32<1:57:18, 210.23it/s]

finished frames 1121400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 187028/1666666 [35:33<2:00:31, 204.62it/s]

finished frames 1122000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 187134/1666666 [35:33<1:58:40, 207.79it/s]

finished frames 1122600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 187240/1666666 [35:34<1:58:17, 208.44it/s]

finished frames 1123200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 187327/1666666 [35:34<1:55:39, 213.17it/s]

finished frames 1123800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█         | 187437/1666666 [35:35<1:54:49, 214.71it/s]

finished frames 1124400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 187525/1666666 [35:35<1:55:12, 213.98it/s]

finished frames 1125000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 187635/1666666 [35:36<1:54:40, 214.96it/s]

finished frames 1125600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 187723/1666666 [35:36<1:54:37, 215.04it/s]

finished frames 1126200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 187833/1666666 [35:36<1:54:47, 214.71it/s]

finished frames 1126800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 187943/1666666 [35:37<1:54:41, 214.89it/s]

finished frames 1127400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188031/1666666 [35:37<1:57:23, 209.92it/s]

finished frames 1128000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188141/1666666 [35:38<1:54:28, 215.27it/s]

finished frames 1128600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188229/1666666 [35:38<1:54:03, 216.05it/s]

finished frames 1129200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188339/1666666 [35:39<1:53:31, 217.02it/s]

finished frames 1129800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188427/1666666 [35:39<1:53:34, 216.91it/s]

finished frames 1130400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188537/1666666 [35:40<1:53:57, 216.18it/s]

finished frames 1131000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188625/1666666 [35:40<1:54:10, 215.74it/s]

finished frames 1131600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188735/1666666 [35:41<1:54:07, 215.82it/s]

finished frames 1132200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188823/1666666 [35:41<1:54:12, 215.66it/s]

finished frames 1132800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 188933/1666666 [35:42<1:54:14, 215.59it/s]

finished frames 1133400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189021/1666666 [35:42<1:57:35, 209.43it/s]

finished frames 1134000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189131/1666666 [35:42<1:54:28, 215.12it/s]

finished frames 1134600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189241/1666666 [35:43<1:53:30, 216.93it/s]

finished frames 1135200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189329/1666666 [35:43<1:53:46, 216.41it/s]

finished frames 1135800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189439/1666666 [35:44<1:53:29, 216.94it/s]

finished frames 1136400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189527/1666666 [35:44<1:53:36, 216.70it/s]

finished frames 1137000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189637/1666666 [35:45<1:53:36, 216.67it/s]

finished frames 1137600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189725/1666666 [35:45<1:53:44, 216.41it/s]

finished frames 1138200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189835/1666666 [35:46<1:53:45, 216.37it/s]

finished frames 1138800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 189923/1666666 [35:46<1:53:42, 216.44it/s]

finished frames 1139400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190033/1666666 [35:47<1:56:37, 211.01it/s]

finished frames 1140000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190143/1666666 [35:47<1:53:44, 216.35it/s]

finished frames 1140600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190231/1666666 [35:48<1:53:36, 216.58it/s]

finished frames 1141200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190341/1666666 [35:48<1:53:22, 217.03it/s]

finished frames 1141800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190429/1666666 [35:48<1:53:38, 216.49it/s]

finished frames 1142400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190539/1666666 [35:49<1:52:55, 217.85it/s]

finished frames 1143000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190627/1666666 [35:49<1:53:08, 217.43it/s]

finished frames 1143600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190737/1666666 [35:50<1:52:36, 218.43it/s]

finished frames 1144200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190825/1666666 [35:50<1:53:52, 216.00it/s]

finished frames 1144800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 190913/1666666 [35:51<1:54:32, 214.75it/s]

finished frames 1145400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 191023/1666666 [35:51<2:01:29, 202.44it/s]

finished frames 1146000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 191132/1666666 [35:52<2:00:11, 204.60it/s]

finished frames 1146600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 191241/1666666 [35:52<1:55:09, 213.54it/s]

finished frames 1147200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 191329/1666666 [35:53<1:54:29, 214.76it/s]

finished frames 1147800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 191439/1666666 [35:53<1:53:46, 216.09it/s]

finished frames 1148400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 191529/1666666 [35:54<1:52:44, 218.08it/s]

finished frames 1149000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 11%|█▏        | 191640/1666666 [35:54<1:57:47, 208.71it/s]

finished frames 1149600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 191728/1666666 [35:55<1:56:36, 210.80it/s]

finished frames 1150200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 191838/1666666 [35:55<1:56:33, 210.88it/s]

finished frames 1150800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 191925/1666666 [35:56<1:57:10, 209.76it/s]

finished frames 1151400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192034/1666666 [35:56<1:59:55, 204.93it/s]

finished frames 1152000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192142/1666666 [35:57<1:57:31, 209.10it/s]

finished frames 1152600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192229/1666666 [35:57<1:56:53, 210.24it/s]

finished frames 1153200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192339/1666666 [35:58<1:56:38, 210.66it/s]

finished frames 1153800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192427/1666666 [35:58<1:56:30, 210.89it/s]

finished frames 1154400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192537/1666666 [35:59<1:56:09, 211.53it/s]

finished frames 1155000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192625/1666666 [35:59<1:56:28, 210.93it/s]

finished frames 1155600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192735/1666666 [35:59<1:56:31, 210.83it/s]

finished frames 1156200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192822/1666666 [36:00<1:56:50, 210.25it/s]

finished frames 1156800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 192932/1666666 [36:00<1:56:29, 210.86it/s]

finished frames 1157400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193020/1666666 [36:01<2:00:06, 204.50it/s]

finished frames 1158000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193128/1666666 [36:01<1:57:16, 209.42it/s]

finished frames 1158600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193237/1666666 [36:02<1:56:26, 210.91it/s]

finished frames 1159200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193325/1666666 [36:02<1:56:33, 210.68it/s]

finished frames 1159800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193434/1666666 [36:03<2:04:17, 197.55it/s]

finished frames 1160400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193543/1666666 [36:03<1:57:44, 208.53it/s]

finished frames 1161000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193630/1666666 [36:04<1:56:55, 209.97it/s]

finished frames 1161600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193739/1666666 [36:04<1:56:28, 210.76it/s]

finished frames 1162200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193826/1666666 [36:05<1:56:24, 210.86it/s]

finished frames 1162800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 193936/1666666 [36:05<1:56:09, 211.30it/s]

finished frames 1163400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194022/1666666 [36:06<1:59:05, 206.09it/s]

finished frames 1164000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194130/1666666 [36:06<1:57:11, 209.42it/s]

finished frames 1164600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194239/1666666 [36:07<1:56:19, 210.98it/s]

finished frames 1165200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194327/1666666 [36:07<1:56:12, 211.16it/s]

finished frames 1165800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194437/1666666 [36:08<1:56:12, 211.16it/s]

finished frames 1166400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194525/1666666 [36:08<1:56:32, 210.53it/s]

finished frames 1167000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194635/1666666 [36:09<1:56:21, 210.86it/s]

finished frames 1167600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194723/1666666 [36:09<1:56:27, 210.66it/s]

finished frames 1168200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194833/1666666 [36:10<1:56:14, 211.03it/s]

finished frames 1168800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 194943/1666666 [36:10<1:56:01, 211.40it/s]

finished frames 1169400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195028/1666666 [36:10<1:59:21, 205.49it/s]

finished frames 1170000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195136/1666666 [36:11<1:57:07, 209.40it/s]

finished frames 1170600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195223/1666666 [36:11<1:56:37, 210.29it/s]

finished frames 1171200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195333/1666666 [36:12<1:56:28, 210.53it/s]

finished frames 1171800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195443/1666666 [36:12<1:56:30, 210.45it/s]

finished frames 1172400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195530/1666666 [36:13<1:56:28, 210.52it/s]

finished frames 1173000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195639/1666666 [36:13<1:56:17, 210.81it/s]

finished frames 1173600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195727/1666666 [36:14<1:56:20, 210.73it/s]

finished frames 1174200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195836/1666666 [36:14<1:59:48, 204.60it/s]

finished frames 1174800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 195922/1666666 [36:15<1:57:30, 208.59it/s]

finished frames 1175400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196029/1666666 [36:15<1:59:08, 205.73it/s]

finished frames 1176000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196138/1666666 [36:16<1:56:33, 210.27it/s]

finished frames 1176600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196225/1666666 [36:16<1:56:20, 210.66it/s]

finished frames 1177200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196334/1666666 [36:17<1:56:15, 210.79it/s]

finished frames 1177800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196422/1666666 [36:17<1:56:07, 211.02it/s]

finished frames 1178400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196532/1666666 [36:18<1:56:15, 210.75it/s]

finished frames 1179000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196642/1666666 [36:18<1:56:05, 211.03it/s]

finished frames 1179600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196730/1666666 [36:19<1:56:15, 210.72it/s]

finished frames 1180200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196839/1666666 [36:19<1:56:04, 211.03it/s]

finished frames 1180800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 196927/1666666 [36:20<1:56:08, 210.92it/s]

finished frames 1181400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197036/1666666 [36:20<1:58:54, 205.98it/s]

finished frames 1182000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197123/1666666 [36:20<1:57:07, 209.10it/s]

finished frames 1182600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197230/1666666 [36:21<1:56:08, 210.87it/s]

finished frames 1183200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197339/1666666 [36:21<1:56:33, 210.11it/s]

finished frames 1183800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197427/1666666 [36:22<1:56:19, 210.52it/s]

finished frames 1184400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197537/1666666 [36:22<1:56:12, 210.70it/s]

finished frames 1185000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197623/1666666 [36:23<1:56:44, 209.72it/s]

finished frames 1185600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197731/1666666 [36:23<1:56:26, 210.26it/s]

finished frames 1186200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197838/1666666 [36:24<1:56:34, 209.99it/s]

finished frames 1186800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 197925/1666666 [36:24<1:56:36, 209.92it/s]

finished frames 1187400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198031/1666666 [36:25<2:06:26, 193.58it/s]

finished frames 1188000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198139/1666666 [36:25<1:58:21, 206.79it/s]

finished frames 1188600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198225/1666666 [36:26<1:56:56, 209.28it/s]

finished frames 1189200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198333/1666666 [36:26<1:56:29, 210.07it/s]

finished frames 1189800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198442/1666666 [36:27<1:56:21, 210.30it/s]

finished frames 1190400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198530/1666666 [36:27<1:56:32, 209.95it/s]

finished frames 1191000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198639/1666666 [36:28<1:56:15, 210.45it/s]

finished frames 1191600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198727/1666666 [36:28<1:56:20, 210.28it/s]

finished frames 1192200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198836/1666666 [36:29<1:56:16, 210.41it/s]

finished frames 1192800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 198923/1666666 [36:29<1:56:31, 209.94it/s]

finished frames 1193400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199029/1666666 [36:30<1:59:06, 205.36it/s]

finished frames 1194000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199138/1666666 [36:30<1:56:41, 209.61it/s]

finished frames 1194600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199224/1666666 [36:31<1:56:36, 209.75it/s]

finished frames 1195200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199332/1666666 [36:31<1:56:12, 210.44it/s]

finished frames 1195800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199442/1666666 [36:32<1:56:12, 210.42it/s]

finished frames 1196400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199530/1666666 [36:32<1:56:08, 210.54it/s]

finished frames 1197000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199640/1666666 [36:33<1:56:03, 210.67it/s]

finished frames 1197600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199727/1666666 [36:33<1:56:02, 210.71it/s]

finished frames 1198200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199835/1666666 [36:33<1:56:17, 210.21it/s]

finished frames 1198800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 199923/1666666 [36:34<1:56:20, 210.11it/s]

finished frames 1199400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200031/1666666 [36:34<1:58:20, 206.56it/s]

finished frames 1200000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200139/1666666 [36:35<1:56:26, 209.91it/s]

finished frames 1200600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200225/1666666 [36:35<1:56:17, 210.17it/s]

finished frames 1201200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200334/1666666 [36:36<1:56:10, 210.35it/s]

finished frames 1201800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200443/1666666 [36:36<1:58:28, 206.25it/s]

finished frames 1202400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200529/1666666 [36:37<2:00:25, 202.90it/s]

finished frames 1203000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200637/1666666 [36:37<1:56:54, 209.00it/s]

finished frames 1203600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200723/1666666 [36:38<1:56:20, 210.00it/s]

finished frames 1204200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200833/1666666 [36:38<1:56:10, 210.30it/s]

finished frames 1204800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 200943/1666666 [36:39<1:55:41, 211.14it/s]

finished frames 1205400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201029/1666666 [36:39<1:58:44, 205.71it/s]

finished frames 1206000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201137/1666666 [36:40<1:56:41, 209.30it/s]

finished frames 1206600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201224/1666666 [36:40<1:55:58, 210.60it/s]

finished frames 1207200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201334/1666666 [36:41<1:55:56, 210.63it/s]

finished frames 1207800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201422/1666666 [36:41<1:55:30, 211.42it/s]

finished frames 1208400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201532/1666666 [36:42<1:56:00, 210.48it/s]

finished frames 1209000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201641/1666666 [36:42<1:55:40, 211.08it/s]

finished frames 1209600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201728/1666666 [36:43<1:56:05, 210.31it/s]

finished frames 1210200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201838/1666666 [36:43<1:55:49, 210.80it/s]

finished frames 1210800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 201926/1666666 [36:43<1:55:48, 210.80it/s]

finished frames 1211400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202032/1666666 [36:44<1:59:05, 204.98it/s]

finished frames 1212000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202141/1666666 [36:45<1:56:25, 209.66it/s]

finished frames 1212600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202228/1666666 [36:45<1:56:08, 210.16it/s]

finished frames 1213200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202337/1666666 [36:45<1:56:11, 210.05it/s]

finished frames 1213800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202424/1666666 [36:46<1:55:58, 210.44it/s]

finished frames 1214400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202534/1666666 [36:46<1:54:18, 213.46it/s]

finished frames 1215000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202644/1666666 [36:47<1:53:51, 214.30it/s]

finished frames 1215600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202732/1666666 [36:47<2:00:59, 201.65it/s]

finished frames 1216200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202841/1666666 [36:48<1:55:42, 210.86it/s]

finished frames 1216800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 202929/1666666 [36:48<1:54:23, 213.26it/s]

finished frames 1217400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203039/1666666 [36:49<1:56:20, 209.69it/s]

finished frames 1218000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203126/1666666 [36:49<1:54:31, 213.00it/s]

finished frames 1218600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203236/1666666 [36:50<1:53:55, 214.09it/s]

finished frames 1219200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203324/1666666 [36:50<1:54:06, 213.75it/s]

finished frames 1219800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203434/1666666 [36:51<1:54:11, 213.57it/s]

finished frames 1220400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203544/1666666 [36:51<1:54:06, 213.71it/s]

finished frames 1221000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203632/1666666 [36:52<1:54:01, 213.85it/s]

finished frames 1221600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203742/1666666 [36:52<1:54:00, 213.86it/s]

finished frames 1222200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203830/1666666 [36:53<1:53:42, 214.42it/s]

finished frames 1222800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 203940/1666666 [36:53<1:53:31, 214.73it/s]

finished frames 1223400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204027/1666666 [36:53<1:57:26, 207.57it/s]

finished frames 1224000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204134/1666666 [36:54<1:56:16, 209.63it/s]

finished frames 1224600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204243/1666666 [36:54<1:56:00, 210.09it/s]

finished frames 1225200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204330/1666666 [36:55<1:56:19, 209.51it/s]

finished frames 1225800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204439/1666666 [36:55<1:55:20, 211.29it/s]

finished frames 1226400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204527/1666666 [36:56<1:55:18, 211.35it/s]

finished frames 1227000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204636/1666666 [36:56<1:55:57, 210.14it/s]

finished frames 1227600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204723/1666666 [36:57<1:56:01, 210.01it/s]

finished frames 1228200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204831/1666666 [36:57<1:55:56, 210.13it/s]

finished frames 1228800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 204940/1666666 [36:58<1:55:41, 210.58it/s]

finished frames 1229400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205025/1666666 [36:58<1:58:34, 205.44it/s]

finished frames 1230000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205130/1666666 [36:59<2:02:26, 198.93it/s]

finished frames 1230600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205238/1666666 [36:59<1:57:05, 208.03it/s]

finished frames 1231200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205323/1666666 [37:00<1:56:29, 209.08it/s]

finished frames 1231800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205430/1666666 [37:00<1:56:16, 209.45it/s]

finished frames 1232400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205536/1666666 [37:01<1:56:13, 209.53it/s]

finished frames 1233000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205622/1666666 [37:01<1:55:46, 210.32it/s]

finished frames 1233600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205730/1666666 [37:02<1:56:07, 209.66it/s]

finished frames 1234200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205838/1666666 [37:02<1:55:55, 210.01it/s]

finished frames 1234800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 205923/1666666 [37:03<1:56:17, 209.35it/s]

finished frames 1235400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206028/1666666 [37:03<1:58:51, 204.80it/s]

finished frames 1236000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206136/1666666 [37:04<1:56:29, 208.96it/s]

finished frames 1236600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206222/1666666 [37:04<1:56:05, 209.66it/s]

finished frames 1237200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206330/1666666 [37:05<1:55:43, 210.32it/s]

finished frames 1237800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206440/1666666 [37:05<1:55:21, 210.96it/s]

finished frames 1238400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206527/1666666 [37:05<1:55:28, 210.75it/s]

finished frames 1239000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206635/1666666 [37:06<1:55:44, 210.23it/s]

finished frames 1239600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206723/1666666 [37:06<1:55:53, 209.97it/s]

finished frames 1240200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206832/1666666 [37:07<1:55:43, 210.25it/s]

finished frames 1240800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 206939/1666666 [37:07<1:55:58, 209.77it/s]

finished frames 1241400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207023/1666666 [37:08<1:59:12, 204.07it/s]

finished frames 1242000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207128/1666666 [37:08<1:57:12, 207.54it/s]

finished frames 1242600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207237/1666666 [37:09<1:56:02, 209.62it/s]

finished frames 1243200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207322/1666666 [37:09<1:56:18, 209.11it/s]

finished frames 1243800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207429/1666666 [37:10<1:56:07, 209.43it/s]

finished frames 1244400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207537/1666666 [37:10<1:55:55, 209.78it/s]

finished frames 1245000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207643/1666666 [37:11<1:58:31, 205.16it/s]

finished frames 1245600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207729/1666666 [37:11<1:56:31, 208.66it/s]

finished frames 1246200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207834/1666666 [37:12<1:56:24, 208.87it/s]

finished frames 1246800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 207939/1666666 [37:12<1:56:24, 208.85it/s]

finished frames 1247400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 208023/1666666 [37:13<1:59:28, 203.49it/s]

finished frames 1248000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 208128/1666666 [37:13<1:57:20, 207.15it/s]

finished frames 1248600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 12%|█▏        | 208233/1666666 [37:14<1:56:47, 208.12it/s]

finished frames 1249200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 208338/1666666 [37:14<1:56:40, 208.32it/s]

finished frames 1249800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 208423/1666666 [37:15<1:56:13, 209.12it/s]

finished frames 1250400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 208529/1666666 [37:15<1:56:12, 209.13it/s]

finished frames 1251000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 208634/1666666 [37:16<1:56:30, 208.56it/s]

finished frames 1251600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 208740/1666666 [37:16<1:56:09, 209.19it/s]

finished frames 1252200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 208824/1666666 [37:17<1:56:40, 208.25it/s]

finished frames 1252800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 208929/1666666 [37:17<1:56:44, 208.11it/s]

finished frames 1253400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209034/1666666 [37:18<1:59:16, 203.68it/s]

finished frames 1254000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209140/1666666 [37:18<1:56:23, 208.72it/s]

finished frames 1254600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209225/1666666 [37:18<1:56:04, 209.26it/s]

finished frames 1255200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209331/1666666 [37:19<1:55:47, 209.76it/s]

finished frames 1255800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209436/1666666 [37:19<1:56:11, 209.02it/s]

finished frames 1256400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209542/1666666 [37:20<1:56:10, 209.04it/s]

finished frames 1257000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209628/1666666 [37:20<1:55:32, 210.18it/s]

finished frames 1257600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209734/1666666 [37:21<1:56:00, 209.32it/s]

finished frames 1258200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209841/1666666 [37:21<1:59:16, 203.57it/s]

finished frames 1258800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 209927/1666666 [37:22<1:56:43, 207.99it/s]

finished frames 1259400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210032/1666666 [37:22<1:59:00, 204.00it/s]

finished frames 1260000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210138/1666666 [37:23<1:56:45, 207.91it/s]

finished frames 1260600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210223/1666666 [37:23<1:55:50, 209.53it/s]

finished frames 1261200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210331/1666666 [37:24<1:55:35, 209.99it/s]

finished frames 1261800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210440/1666666 [37:24<1:55:09, 210.74it/s]

finished frames 1262400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210528/1666666 [37:25<1:55:12, 210.64it/s]

finished frames 1263000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210635/1666666 [37:25<1:55:43, 209.69it/s]

finished frames 1263600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210742/1666666 [37:26<1:55:24, 210.25it/s]

finished frames 1264200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210829/1666666 [37:26<1:55:24, 210.24it/s]

finished frames 1264800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 210939/1666666 [37:27<1:54:58, 211.03it/s]

finished frames 1265400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211026/1666666 [37:27<1:57:56, 205.71it/s]

finished frames 1266000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211133/1666666 [37:28<1:55:55, 209.28it/s]

finished frames 1266600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211241/1666666 [37:28<1:55:28, 210.07it/s]

finished frames 1267200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211328/1666666 [37:29<1:55:35, 209.82it/s]

finished frames 1267800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211436/1666666 [37:29<1:55:25, 210.12it/s]

finished frames 1268400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211524/1666666 [37:29<1:55:08, 210.63it/s]

finished frames 1269000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211634/1666666 [37:30<1:55:05, 210.70it/s]

finished frames 1269600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211722/1666666 [37:30<1:55:15, 210.40it/s]

finished frames 1270200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211832/1666666 [37:31<1:55:16, 210.35it/s]

finished frames 1270800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 211942/1666666 [37:31<1:54:59, 210.84it/s]

finished frames 1271400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212028/1666666 [37:32<1:58:05, 205.29it/s]

finished frames 1272000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212134/1666666 [37:32<2:06:38, 191.42it/s]

finished frames 1272600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212218/1666666 [37:33<1:59:06, 203.52it/s]

finished frames 1273200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212324/1666666 [37:33<1:58:43, 204.15it/s]

finished frames 1273800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212431/1666666 [37:34<1:56:09, 208.65it/s]

finished frames 1274400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212540/1666666 [37:34<1:55:09, 210.46it/s]

finished frames 1275000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212627/1666666 [37:35<1:55:16, 210.22it/s]

finished frames 1275600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212737/1666666 [37:35<1:55:08, 210.46it/s]

finished frames 1276200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212824/1666666 [37:36<1:55:09, 210.40it/s]

finished frames 1276800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 212933/1666666 [37:36<1:55:23, 209.96it/s]

finished frames 1277400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213039/1666666 [37:37<1:58:18, 204.79it/s]

finished frames 1278000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213125/1666666 [37:37<1:56:12, 208.48it/s]

finished frames 1278600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213233/1666666 [37:38<1:55:27, 209.80it/s]

finished frames 1279200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213341/1666666 [37:38<1:55:17, 210.09it/s]

finished frames 1279800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213427/1666666 [37:39<1:55:27, 209.78it/s]

finished frames 1280400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213535/1666666 [37:39<1:55:19, 210.01it/s]

finished frames 1281000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213643/1666666 [37:40<1:55:18, 210.02it/s]

finished frames 1281600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213728/1666666 [37:40<1:55:50, 209.04it/s]

finished frames 1282200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213835/1666666 [37:41<1:55:14, 210.10it/s]

finished frames 1282800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 213923/1666666 [37:41<1:55:09, 210.24it/s]

finished frames 1283400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214030/1666666 [37:42<1:57:58, 205.22it/s]

finished frames 1284000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214138/1666666 [37:42<1:55:37, 209.39it/s]

finished frames 1284600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214225/1666666 [37:42<1:55:17, 209.97it/s]

finished frames 1285200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214335/1666666 [37:43<1:55:07, 210.24it/s]

finished frames 1285800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214423/1666666 [37:43<1:55:12, 210.08it/s]

finished frames 1286400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214532/1666666 [37:44<1:58:10, 204.81it/s]

finished frames 1287000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214640/1666666 [37:45<2:00:32, 200.77it/s]

finished frames 1287600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214727/1666666 [37:45<1:56:03, 208.49it/s]

finished frames 1288200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214835/1666666 [37:45<1:55:09, 210.12it/s]

finished frames 1288800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 214923/1666666 [37:46<1:55:10, 210.09it/s]

finished frames 1289400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215029/1666666 [37:46<1:58:21, 204.40it/s]

finished frames 1290000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215134/1666666 [37:47<1:58:28, 204.21it/s]

finished frames 1290600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215243/1666666 [37:47<1:55:13, 209.93it/s]

finished frames 1291200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215330/1666666 [37:48<1:55:17, 209.81it/s]

finished frames 1291800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215438/1666666 [37:48<1:54:52, 210.56it/s]

finished frames 1292400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215526/1666666 [37:49<1:55:06, 210.12it/s]

finished frames 1293000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215635/1666666 [37:49<1:54:44, 210.78it/s]

finished frames 1293600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215722/1666666 [37:50<1:54:57, 210.37it/s]

finished frames 1294200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215832/1666666 [37:50<1:54:51, 210.52it/s]

finished frames 1294800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 215942/1666666 [37:51<1:54:40, 210.84it/s]

finished frames 1295400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216028/1666666 [37:51<1:57:32, 205.70it/s]

finished frames 1296000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216136/1666666 [37:52<1:55:12, 209.84it/s]

finished frames 1296600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216223/1666666 [37:52<1:54:52, 210.43it/s]

finished frames 1297200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216332/1666666 [37:53<1:54:43, 210.69it/s]

finished frames 1297800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216441/1666666 [37:53<1:56:48, 206.94it/s]

finished frames 1298400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216529/1666666 [37:54<1:54:27, 211.15it/s]

finished frames 1299000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216639/1666666 [37:54<1:54:12, 211.59it/s]

finished frames 1299600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216726/1666666 [37:54<1:57:39, 205.38it/s]

finished frames 1300200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216832/1666666 [37:55<1:56:22, 207.62it/s]

finished frames 1300800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 216917/1666666 [37:55<1:55:36, 209.01it/s]

finished frames 1301400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217022/1666666 [37:56<2:00:39, 200.24it/s]

finished frames 1302000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217127/1666666 [37:56<2:00:28, 200.54it/s]

finished frames 1302600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217234/1666666 [37:57<1:55:55, 208.39it/s]

finished frames 1303200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217342/1666666 [37:58<1:55:06, 209.85it/s]

finished frames 1303800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217428/1666666 [37:58<1:55:00, 210.03it/s]

finished frames 1304400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217537/1666666 [37:58<1:54:47, 210.41it/s]

finished frames 1305000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217624/1666666 [37:59<1:55:01, 209.98it/s]

finished frames 1305600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217731/1666666 [37:59<1:54:59, 210.02it/s]

finished frames 1306200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217841/1666666 [38:00<1:54:41, 210.53it/s]

finished frames 1306800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 217928/1666666 [38:00<1:54:57, 210.03it/s]

finished frames 1307400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218035/1666666 [38:01<1:57:51, 204.86it/s]

finished frames 1308000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218122/1666666 [38:01<1:55:43, 208.61it/s]

finished frames 1308600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218230/1666666 [38:02<1:55:16, 209.42it/s]

finished frames 1309200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218336/1666666 [38:02<1:55:14, 209.45it/s]

finished frames 1309800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218443/1666666 [38:03<1:55:05, 209.73it/s]

finished frames 1310400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218528/1666666 [38:03<1:55:11, 209.54it/s]

finished frames 1311000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218635/1666666 [38:04<1:54:58, 209.91it/s]

finished frames 1311600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218742/1666666 [38:04<1:55:03, 209.73it/s]

finished frames 1312200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218829/1666666 [38:05<1:54:40, 210.42it/s]

finished frames 1312800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 218938/1666666 [38:05<1:54:43, 210.32it/s]

finished frames 1313400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219024/1666666 [38:06<1:57:35, 205.19it/s]

finished frames 1314000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219131/1666666 [38:06<1:55:24, 209.04it/s]

finished frames 1314600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219240/1666666 [38:07<1:59:43, 201.50it/s]

finished frames 1315200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219326/1666666 [38:07<1:59:37, 201.65it/s]

finished frames 1315800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219433/1666666 [38:08<1:55:51, 208.20it/s]

finished frames 1316400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219540/1666666 [38:08<1:55:13, 209.31it/s]

finished frames 1317000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219626/1666666 [38:09<1:55:04, 209.59it/s]

finished frames 1317600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219733/1666666 [38:09<1:54:56, 209.82it/s]

finished frames 1318200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219840/1666666 [38:10<1:55:02, 209.60it/s]

finished frames 1318800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 219926/1666666 [38:10<1:54:45, 210.12it/s]

finished frames 1319400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220033/1666666 [38:10<1:57:28, 205.24it/s]

finished frames 1320000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220142/1666666 [38:11<1:54:53, 209.85it/s]

finished frames 1320600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220228/1666666 [38:11<1:54:38, 210.30it/s]

finished frames 1321200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220337/1666666 [38:12<1:54:17, 210.90it/s]

finished frames 1321800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220424/1666666 [38:12<1:54:43, 210.09it/s]

finished frames 1322400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220532/1666666 [38:13<1:54:50, 209.88it/s]

finished frames 1323000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220641/1666666 [38:13<1:54:29, 210.50it/s]

finished frames 1323600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220729/1666666 [38:14<1:54:28, 210.51it/s]

finished frames 1324200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220839/1666666 [38:14<1:54:27, 210.54it/s]

finished frames 1324800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 220927/1666666 [38:15<1:54:26, 210.54it/s]

finished frames 1325400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221036/1666666 [38:15<1:56:58, 205.98it/s]

finished frames 1326000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221122/1666666 [38:16<1:55:24, 208.77it/s]

finished frames 1326600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221230/1666666 [38:16<1:54:56, 209.60it/s]

finished frames 1327200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221339/1666666 [38:17<1:54:36, 210.17it/s]

finished frames 1327800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221426/1666666 [38:17<1:54:23, 210.57it/s]

finished frames 1328400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221534/1666666 [38:18<1:54:51, 209.69it/s]

finished frames 1329000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221643/1666666 [38:18<1:54:30, 210.34it/s]

finished frames 1329600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221730/1666666 [38:19<1:54:36, 210.13it/s]

finished frames 1330200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221838/1666666 [38:19<1:54:42, 209.92it/s]

finished frames 1330800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 221924/1666666 [38:20<1:54:47, 209.75it/s]

finished frames 1331400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222029/1666666 [38:20<1:57:32, 204.85it/s]

finished frames 1332000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222137/1666666 [38:21<1:54:56, 209.47it/s]

finished frames 1332600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222223/1666666 [38:21<1:54:43, 209.84it/s]

finished frames 1333200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222329/1666666 [38:21<1:54:56, 209.43it/s]

finished frames 1333800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222436/1666666 [38:22<1:54:42, 209.85it/s]

finished frames 1334400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222522/1666666 [38:22<1:54:22, 210.45it/s]

finished frames 1335000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222632/1666666 [38:23<1:54:31, 210.14it/s]

finished frames 1335600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222741/1666666 [38:23<1:53:35, 211.85it/s]

finished frames 1336200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222829/1666666 [38:24<1:54:02, 211.02it/s]

finished frames 1336800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 222937/1666666 [38:24<1:54:40, 209.82it/s]

finished frames 1337400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223022/1666666 [38:25<1:57:15, 205.19it/s]

finished frames 1338000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223129/1666666 [38:25<1:54:44, 209.69it/s]

finished frames 1338600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223237/1666666 [38:26<1:54:23, 210.29it/s]

finished frames 1339200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223324/1666666 [38:26<1:54:17, 210.49it/s]

finished frames 1339800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223431/1666666 [38:27<1:54:50, 209.46it/s]

finished frames 1340400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223539/1666666 [38:27<1:54:00, 210.98it/s]

finished frames 1341000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223627/1666666 [38:28<1:53:46, 211.38it/s]

finished frames 1341600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223737/1666666 [38:28<1:54:20, 210.31it/s]

finished frames 1342200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223824/1666666 [38:29<1:54:39, 209.74it/s]

finished frames 1342800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 223933/1666666 [38:29<1:57:40, 204.35it/s]

finished frames 1343400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224039/1666666 [38:30<1:57:06, 205.30it/s]

finished frames 1344000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224124/1666666 [38:30<1:55:20, 208.45it/s]

finished frames 1344600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224232/1666666 [38:31<1:54:33, 209.86it/s]

finished frames 1345200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224340/1666666 [38:31<1:54:21, 210.19it/s]

finished frames 1345800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224428/1666666 [38:32<1:54:06, 210.65it/s]

finished frames 1346400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224537/1666666 [38:32<1:54:17, 210.29it/s]

finished frames 1347000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224625/1666666 [38:32<1:54:17, 210.28it/s]

finished frames 1347600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224735/1666666 [38:33<1:54:18, 210.24it/s]

finished frames 1348200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224822/1666666 [38:33<1:54:25, 210.02it/s]

finished frames 1348800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 13%|█▎        | 224930/1666666 [38:34<1:54:26, 209.95it/s]

finished frames 1349400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225036/1666666 [38:34<1:56:42, 205.87it/s]

finished frames 1350000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225142/1666666 [38:35<1:55:01, 208.87it/s]

finished frames 1350600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225228/1666666 [38:35<1:54:32, 209.74it/s]

finished frames 1351200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225336/1666666 [38:36<1:53:59, 210.73it/s]

finished frames 1351800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225424/1666666 [38:36<1:54:17, 210.17it/s]

finished frames 1352400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225532/1666666 [38:37<1:54:14, 210.25it/s]

finished frames 1353000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225642/1666666 [38:37<1:53:20, 211.90it/s]

finished frames 1353600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225729/1666666 [38:38<1:54:04, 210.54it/s]

finished frames 1354200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225839/1666666 [38:38<1:53:49, 210.96it/s]

finished frames 1354800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 225925/1666666 [38:39<1:53:57, 210.70it/s]

finished frames 1355400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226032/1666666 [38:39<1:57:07, 204.99it/s]

finished frames 1356000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226140/1666666 [38:40<1:54:36, 209.48it/s]

finished frames 1356600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226227/1666666 [38:40<1:54:10, 210.26it/s]

finished frames 1357200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226337/1666666 [38:41<2:03:25, 194.49it/s]

finished frames 1357800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226424/1666666 [38:41<1:56:28, 206.09it/s]

finished frames 1358400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226530/1666666 [38:42<1:54:45, 209.16it/s]

finished frames 1359000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226638/1666666 [38:42<1:54:03, 210.43it/s]

finished frames 1359600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226726/1666666 [38:43<1:53:43, 211.02it/s]

finished frames 1360200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226836/1666666 [38:43<1:53:33, 211.31it/s]

finished frames 1360800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 226924/1666666 [38:43<1:53:53, 210.68it/s]

finished frames 1361400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227032/1666666 [38:44<1:56:52, 205.29it/s]

finished frames 1362000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227141/1666666 [38:45<1:54:02, 210.39it/s]

finished frames 1362600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227229/1666666 [38:45<1:53:52, 210.68it/s]

finished frames 1363200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227339/1666666 [38:45<1:53:43, 210.93it/s]

finished frames 1363800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227427/1666666 [38:46<1:53:55, 210.54it/s]

finished frames 1364400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227537/1666666 [38:46<1:53:31, 211.29it/s]

finished frames 1365000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227623/1666666 [38:47<1:54:27, 209.54it/s]

finished frames 1365600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227730/1666666 [38:47<1:54:22, 209.67it/s]

finished frames 1366200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227837/1666666 [38:48<1:54:11, 209.99it/s]

finished frames 1366800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 227924/1666666 [38:48<1:54:17, 209.80it/s]

finished frames 1367400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228029/1666666 [38:49<1:57:04, 204.79it/s]

finished frames 1368000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228137/1666666 [38:49<1:54:17, 209.76it/s]

finished frames 1368600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228242/1666666 [38:50<1:54:49, 208.80it/s]

finished frames 1369200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228326/1666666 [38:50<1:55:09, 208.18it/s]

finished frames 1369800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228431/1666666 [38:51<1:55:00, 208.41it/s]

finished frames 1370400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228536/1666666 [38:51<1:54:48, 208.76it/s]

finished frames 1371000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228641/1666666 [38:52<1:57:23, 204.15it/s]

finished frames 1371600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228725/1666666 [38:52<2:00:29, 198.89it/s]

finished frames 1372200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228830/1666666 [38:53<1:56:01, 206.53it/s]

finished frames 1372800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 228935/1666666 [38:53<1:55:09, 208.08it/s]

finished frames 1373400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 229040/1666666 [38:54<1:57:22, 204.13it/s]

finished frames 1374000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▎        | 229124/1666666 [38:54<1:55:39, 207.15it/s]

finished frames 1374600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 229229/1666666 [38:55<1:54:55, 208.45it/s]

finished frames 1375200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 229334/1666666 [38:55<1:54:50, 208.58it/s]

finished frames 1375800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 229440/1666666 [38:56<1:54:25, 209.35it/s]

finished frames 1376400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 229524/1666666 [38:56<1:54:41, 208.83it/s]

finished frames 1377000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 229629/1666666 [38:57<1:54:40, 208.84it/s]

finished frames 1377600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 229734/1666666 [38:57<1:54:37, 208.95it/s]

finished frames 1378200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 229840/1666666 [38:58<1:54:31, 209.11it/s]

finished frames 1378800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 229925/1666666 [38:58<1:54:23, 209.33it/s]

finished frames 1379400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230030/1666666 [38:58<1:56:46, 205.05it/s]

finished frames 1380000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230136/1666666 [38:59<1:54:48, 208.56it/s]

finished frames 1380600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230242/1666666 [38:59<1:54:23, 209.27it/s]

finished frames 1381200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230326/1666666 [39:00<1:54:28, 209.11it/s]

finished frames 1381800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230433/1666666 [39:00<1:54:17, 209.43it/s]

finished frames 1382400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230539/1666666 [39:01<1:54:13, 209.56it/s]

finished frames 1383000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230623/1666666 [39:01<1:54:44, 208.58it/s]

finished frames 1383600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230728/1666666 [39:02<1:55:31, 207.16it/s]

finished frames 1384200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230834/1666666 [39:02<1:54:49, 208.42it/s]

finished frames 1384800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 230940/1666666 [39:03<1:59:23, 200.44it/s]

finished frames 1385400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231024/1666666 [39:03<2:08:57, 185.55it/s]

finished frames 1386000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231132/1666666 [39:04<1:56:19, 205.68it/s]

finished frames 1386600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231240/1666666 [39:04<1:53:47, 210.25it/s]

finished frames 1387200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231327/1666666 [39:05<1:53:38, 210.52it/s]

finished frames 1387800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231437/1666666 [39:05<1:53:24, 210.92it/s]

finished frames 1388400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231525/1666666 [39:06<1:53:32, 210.65it/s]

finished frames 1389000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231634/1666666 [39:06<1:54:02, 209.72it/s]

finished frames 1389600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231742/1666666 [39:07<1:53:55, 209.92it/s]

finished frames 1390200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231829/1666666 [39:07<1:53:40, 210.37it/s]

finished frames 1390800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 231938/1666666 [39:08<1:53:28, 210.73it/s]

finished frames 1391400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232023/1666666 [39:08<1:57:11, 204.04it/s]

finished frames 1392000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232130/1666666 [39:09<1:54:27, 208.89it/s]

finished frames 1392600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232238/1666666 [39:09<1:53:40, 210.30it/s]

finished frames 1393200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232325/1666666 [39:10<1:53:55, 209.83it/s]

finished frames 1393800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232431/1666666 [39:10<1:54:00, 209.66it/s]

finished frames 1394400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232539/1666666 [39:11<1:53:25, 210.73it/s]

finished frames 1395000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232627/1666666 [39:11<1:53:36, 210.37it/s]

finished frames 1395600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232737/1666666 [39:11<1:53:17, 210.96it/s]

finished frames 1396200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232824/1666666 [39:12<1:53:36, 210.35it/s]

finished frames 1396800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 232934/1666666 [39:12<1:53:41, 210.17it/s]

finished frames 1397400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233021/1666666 [39:13<1:57:01, 204.17it/s]

finished frames 1398000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233128/1666666 [39:13<1:54:16, 209.09it/s]

finished frames 1398600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233236/1666666 [39:14<1:53:54, 209.72it/s]

finished frames 1399200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233323/1666666 [39:14<1:53:30, 210.45it/s]

finished frames 1399800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233432/1666666 [39:15<1:57:04, 204.04it/s]

finished frames 1400400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233539/1666666 [39:15<1:54:20, 208.90it/s]

finished frames 1401000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233623/1666666 [39:16<1:54:24, 208.76it/s]

finished frames 1401600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233729/1666666 [39:16<1:54:05, 209.33it/s]

finished frames 1402200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233836/1666666 [39:17<1:53:45, 209.94it/s]

finished frames 1402800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 233923/1666666 [39:17<1:53:13, 210.90it/s]

finished frames 1403400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234030/1666666 [39:18<1:56:06, 205.65it/s]

finished frames 1404000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234138/1666666 [39:18<1:53:44, 209.89it/s]

finished frames 1404600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234223/1666666 [39:19<1:53:58, 209.47it/s]

finished frames 1405200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234329/1666666 [39:19<1:53:58, 209.46it/s]

finished frames 1405800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234437/1666666 [39:20<1:53:45, 209.84it/s]

finished frames 1406400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234542/1666666 [39:20<1:53:59, 209.38it/s]

finished frames 1407000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234628/1666666 [39:21<1:53:42, 209.91it/s]

finished frames 1407600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234734/1666666 [39:21<1:53:52, 209.57it/s]

finished frames 1408200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234842/1666666 [39:22<1:53:30, 210.25it/s]

finished frames 1408800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 234929/1666666 [39:22<1:53:36, 210.05it/s]

finished frames 1409400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235037/1666666 [39:23<1:55:49, 206.01it/s]

finished frames 1410000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235122/1666666 [39:23<1:54:35, 208.22it/s]

finished frames 1410600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235230/1666666 [39:23<1:53:43, 209.77it/s]

finished frames 1411200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235338/1666666 [39:24<1:53:14, 210.66it/s]

finished frames 1411800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235424/1666666 [39:24<1:53:36, 209.98it/s]

finished frames 1412400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235531/1666666 [39:25<1:54:21, 208.56it/s]

finished frames 1413000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235636/1666666 [39:25<1:59:16, 199.95it/s]

finished frames 1413600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235721/1666666 [39:26<2:05:39, 189.79it/s]

finished frames 1414200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235826/1666666 [39:26<1:56:24, 204.85it/s]

finished frames 1414800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 235932/1666666 [39:27<1:54:36, 208.05it/s]

finished frames 1415400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236038/1666666 [39:27<1:56:49, 204.09it/s]

finished frames 1416000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236143/1666666 [39:28<1:54:47, 207.70it/s]

finished frames 1416600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236228/1666666 [39:28<1:54:18, 208.56it/s]

finished frames 1417200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236334/1666666 [39:29<1:54:04, 208.97it/s]

finished frames 1417800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236441/1666666 [39:29<1:53:43, 209.61it/s]

finished frames 1418400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236525/1666666 [39:30<1:54:14, 208.63it/s]

finished frames 1419000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236632/1666666 [39:30<1:53:48, 209.43it/s]

finished frames 1419600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236738/1666666 [39:31<1:53:34, 209.84it/s]

finished frames 1420200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236824/1666666 [39:31<1:53:48, 209.40it/s]

finished frames 1420800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 236929/1666666 [39:32<1:54:19, 208.42it/s]

finished frames 1421400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237034/1666666 [39:32<1:56:47, 204.03it/s]

finished frames 1422000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237139/1666666 [39:33<1:54:41, 207.73it/s]

finished frames 1422600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237224/1666666 [39:33<1:54:11, 208.64it/s]

finished frames 1423200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237329/1666666 [39:34<1:54:32, 207.98it/s]

finished frames 1423800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237435/1666666 [39:34<1:54:11, 208.59it/s]

finished frames 1424400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237540/1666666 [39:35<1:54:18, 208.37it/s]

finished frames 1425000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237624/1666666 [39:35<1:54:29, 208.02it/s]

finished frames 1425600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237729/1666666 [39:36<1:54:31, 207.94it/s]

finished frames 1426200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237834/1666666 [39:36<1:54:06, 208.70it/s]

finished frames 1426800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 237939/1666666 [39:37<1:54:08, 208.62it/s]

finished frames 1427400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238023/1666666 [39:37<1:56:13, 204.87it/s]

finished frames 1428000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238130/1666666 [39:37<1:56:10, 204.94it/s]

finished frames 1428600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238237/1666666 [39:38<1:53:50, 209.12it/s]

finished frames 1429200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238323/1666666 [39:38<1:53:35, 209.58it/s]

finished frames 1429800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238428/1666666 [39:39<1:54:00, 208.78it/s]

finished frames 1430400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238534/1666666 [39:39<1:53:39, 209.42it/s]

finished frames 1431000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238639/1666666 [39:40<1:54:10, 208.46it/s]

finished frames 1431600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238724/1666666 [39:40<1:53:36, 209.49it/s]

finished frames 1432200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238834/1666666 [39:41<1:51:37, 213.18it/s]

finished frames 1432800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 238944/1666666 [39:41<1:50:58, 214.43it/s]

finished frames 1433400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239031/1666666 [39:42<1:56:50, 203.65it/s]

finished frames 1434000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239140/1666666 [39:42<1:53:15, 210.08it/s]

finished frames 1434600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239228/1666666 [39:43<1:51:30, 213.35it/s]

finished frames 1435200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239338/1666666 [39:43<1:50:58, 214.38it/s]

finished frames 1435800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239426/1666666 [39:44<1:50:57, 214.40it/s]

finished frames 1436400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239536/1666666 [39:44<1:50:55, 214.42it/s]

finished frames 1437000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239624/1666666 [39:45<1:50:59, 214.28it/s]

finished frames 1437600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239734/1666666 [39:45<1:50:33, 215.11it/s]

finished frames 1438200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239844/1666666 [39:46<1:50:44, 214.75it/s]

finished frames 1438800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 239932/1666666 [39:46<1:50:22, 215.42it/s]

finished frames 1439400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240041/1666666 [39:47<1:54:45, 207.18it/s]

finished frames 1440000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240127/1666666 [39:47<1:53:14, 209.96it/s]

finished frames 1440600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240237/1666666 [39:47<1:50:21, 215.43it/s]

finished frames 1441200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240325/1666666 [39:48<1:56:49, 203.49it/s]

finished frames 1441800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240435/1666666 [39:48<1:50:55, 214.31it/s]

finished frames 1442400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240523/1666666 [39:49<1:49:57, 216.15it/s]

finished frames 1443000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240633/1666666 [39:49<1:49:36, 216.85it/s]

finished frames 1443600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240743/1666666 [39:50<1:49:32, 216.96it/s]

finished frames 1444200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240831/1666666 [39:50<1:49:39, 216.70it/s]

finished frames 1444800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 240941/1666666 [39:51<1:49:22, 217.25it/s]

finished frames 1445400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 241029/1666666 [39:51<1:52:06, 211.95it/s]

finished frames 1446000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 241139/1666666 [39:52<1:50:14, 215.52it/s]

finished frames 1446600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 241227/1666666 [39:52<1:50:08, 215.71it/s]

finished frames 1447200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 241337/1666666 [39:53<1:49:46, 216.41it/s]

finished frames 1447800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 241425/1666666 [39:53<1:49:59, 215.97it/s]

finished frames 1448400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 241532/1666666 [39:54<1:56:56, 203.12it/s]

finished frames 1449000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 14%|█▍        | 241639/1666666 [39:54<1:53:06, 209.98it/s]

finished frames 1449600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 241728/1666666 [39:54<1:52:47, 210.55it/s]

finished frames 1450200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 241836/1666666 [39:55<1:54:18, 207.76it/s]

finished frames 1450800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 241941/1666666 [39:56<1:55:40, 205.29it/s]

finished frames 1451400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242025/1666666 [39:56<1:58:23, 200.55it/s]

finished frames 1452000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242132/1666666 [39:56<1:53:52, 208.51it/s]

finished frames 1452600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242239/1666666 [39:57<1:54:29, 207.35it/s]

finished frames 1453200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242326/1666666 [39:57<1:53:18, 209.52it/s]

finished frames 1453800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242436/1666666 [39:58<1:51:00, 213.82it/s]

finished frames 1454400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242524/1666666 [39:58<1:50:57, 213.92it/s]

finished frames 1455000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242634/1666666 [39:59<1:50:51, 214.09it/s]

finished frames 1455600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242744/1666666 [39:59<1:54:03, 208.07it/s]

finished frames 1456200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242830/1666666 [40:00<1:56:42, 203.33it/s]

finished frames 1456800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 242939/1666666 [40:00<1:51:54, 212.03it/s]

finished frames 1457400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243027/1666666 [40:01<1:53:28, 209.11it/s]

finished frames 1458000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243136/1666666 [40:01<1:51:08, 213.46it/s]

finished frames 1458600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243224/1666666 [40:02<1:50:41, 214.33it/s]

finished frames 1459200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243334/1666666 [40:02<1:51:12, 213.31it/s]

finished frames 1459800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243422/1666666 [40:03<1:50:56, 213.82it/s]

finished frames 1460400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243532/1666666 [40:03<1:51:00, 213.67it/s]

finished frames 1461000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243642/1666666 [40:04<1:50:42, 214.24it/s]

finished frames 1461600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243730/1666666 [40:04<1:50:38, 214.34it/s]

finished frames 1462200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243840/1666666 [40:05<1:50:31, 214.57it/s]

finished frames 1462800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 243928/1666666 [40:05<1:50:15, 215.07it/s]

finished frames 1463400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244038/1666666 [40:06<1:52:46, 210.25it/s]

finished frames 1464000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244126/1666666 [40:06<1:51:12, 213.18it/s]

finished frames 1464600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244236/1666666 [40:06<1:50:55, 213.72it/s]

finished frames 1465200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244324/1666666 [40:07<1:50:58, 213.63it/s]

finished frames 1465800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244434/1666666 [40:07<1:50:59, 213.57it/s]

finished frames 1466400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244522/1666666 [40:08<1:52:08, 211.35it/s]

finished frames 1467000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244632/1666666 [40:08<1:50:54, 213.69it/s]

finished frames 1467600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244742/1666666 [40:09<1:50:48, 213.88it/s]

finished frames 1468200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244830/1666666 [40:09<1:50:42, 214.05it/s]

finished frames 1468800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 244940/1666666 [40:10<1:50:46, 213.92it/s]

finished frames 1469400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245028/1666666 [40:10<1:53:28, 208.79it/s]

finished frames 1470000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245136/1666666 [40:11<1:58:11, 200.44it/s]

finished frames 1470600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245244/1666666 [40:11<1:55:21, 205.36it/s]

finished frames 1471200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245331/1666666 [40:12<1:51:53, 211.70it/s]

finished frames 1471800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245441/1666666 [40:12<1:50:52, 213.63it/s]

finished frames 1472400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245529/1666666 [40:13<1:50:55, 213.52it/s]

finished frames 1473000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245639/1666666 [40:13<1:50:35, 214.17it/s]

finished frames 1473600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245727/1666666 [40:14<1:50:56, 213.45it/s]

finished frames 1474200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245837/1666666 [40:14<1:50:29, 214.32it/s]

finished frames 1474800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 245925/1666666 [40:14<1:51:02, 213.25it/s]

finished frames 1475400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246035/1666666 [40:15<1:53:11, 209.19it/s]

finished frames 1476000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246144/1666666 [40:15<1:51:02, 213.22it/s]

finished frames 1476600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246232/1666666 [40:16<1:50:44, 213.76it/s]

finished frames 1477200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246342/1666666 [40:16<1:50:35, 214.05it/s]

finished frames 1477800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246430/1666666 [40:17<1:50:21, 214.50it/s]

finished frames 1478400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246540/1666666 [40:17<1:50:30, 214.17it/s]

finished frames 1479000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246628/1666666 [40:18<1:50:27, 214.26it/s]

finished frames 1479600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246738/1666666 [40:18<1:50:03, 215.03it/s]

finished frames 1480200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246826/1666666 [40:19<1:50:03, 215.02it/s]

finished frames 1480800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 246936/1666666 [40:19<1:50:00, 215.10it/s]

finished frames 1481400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247024/1666666 [40:20<1:52:30, 210.30it/s]

finished frames 1482000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247134/1666666 [40:20<1:50:10, 214.75it/s]

finished frames 1482600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247244/1666666 [40:21<1:50:14, 214.60it/s]

finished frames 1483200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247332/1666666 [40:21<1:50:13, 214.61it/s]

finished frames 1483800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247442/1666666 [40:22<1:50:11, 214.65it/s]

finished frames 1484400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247530/1666666 [40:22<1:50:26, 214.16it/s]

finished frames 1485000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247640/1666666 [40:23<1:53:07, 209.05it/s]

finished frames 1485600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247728/1666666 [40:23<1:50:37, 213.76it/s]

finished frames 1486200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247838/1666666 [40:23<1:49:41, 215.57it/s]

finished frames 1486800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 247926/1666666 [40:24<1:50:05, 214.79it/s]

finished frames 1487400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248036/1666666 [40:24<1:52:10, 210.77it/s]

finished frames 1488000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248124/1666666 [40:25<1:50:41, 213.60it/s]

finished frames 1488600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248234/1666666 [40:25<1:50:00, 214.90it/s]

finished frames 1489200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248344/1666666 [40:26<1:50:16, 214.36it/s]

finished frames 1489800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248432/1666666 [40:26<1:50:29, 213.93it/s]

finished frames 1490400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248542/1666666 [40:27<1:50:05, 214.67it/s]

finished frames 1491000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248630/1666666 [40:27<1:50:06, 214.63it/s]

finished frames 1491600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248740/1666666 [40:28<1:49:46, 215.26it/s]

finished frames 1492200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248828/1666666 [40:28<1:50:08, 214.55it/s]

finished frames 1492800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 248938/1666666 [40:29<1:49:45, 215.28it/s]

finished frames 1493400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249025/1666666 [40:29<1:55:19, 204.87it/s]

finished frames 1494000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249132/1666666 [40:30<1:53:11, 208.71it/s]

finished frames 1494600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249239/1666666 [40:30<1:52:45, 209.51it/s]

finished frames 1495200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249325/1666666 [40:30<1:52:33, 209.86it/s]

finished frames 1495800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249435/1666666 [40:31<1:52:17, 210.35it/s]

finished frames 1496400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249522/1666666 [40:31<1:52:20, 210.25it/s]

finished frames 1497000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249631/1666666 [40:32<1:52:26, 210.03it/s]

finished frames 1497600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249740/1666666 [40:32<1:52:10, 210.52it/s]

finished frames 1498200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249826/1666666 [40:33<1:59:34, 197.48it/s]

finished frames 1498800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▍        | 249934/1666666 [40:33<1:53:35, 207.86it/s]

finished frames 1499400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250040/1666666 [40:34<1:57:11, 201.48it/s]

finished frames 1500000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250125/1666666 [40:34<1:53:59, 207.12it/s]

finished frames 1500600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250232/1666666 [40:35<1:52:50, 209.21it/s]

finished frames 1501200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250339/1666666 [40:35<1:52:29, 209.83it/s]

finished frames 1501800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250424/1666666 [40:36<1:52:36, 209.62it/s]

finished frames 1502400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250532/1666666 [40:36<1:52:43, 209.37it/s]

finished frames 1503000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250640/1666666 [40:37<1:52:21, 210.06it/s]

finished frames 1503600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250726/1666666 [40:37<1:52:38, 209.51it/s]

finished frames 1504200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250832/1666666 [40:38<1:52:52, 209.04it/s]

finished frames 1504800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 250940/1666666 [40:38<1:52:30, 209.73it/s]

finished frames 1505400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251024/1666666 [40:39<1:55:38, 204.04it/s]

finished frames 1506000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251131/1666666 [40:39<1:53:05, 208.60it/s]

finished frames 1506600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251239/1666666 [40:40<1:52:21, 209.97it/s]

finished frames 1507200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251324/1666666 [40:40<1:52:33, 209.57it/s]

finished frames 1507800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251432/1666666 [40:41<1:52:22, 209.89it/s]

finished frames 1508400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251540/1666666 [40:41<1:52:04, 210.43it/s]

finished frames 1509000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251625/1666666 [40:42<1:52:58, 208.74it/s]

finished frames 1509600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251731/1666666 [40:42<1:52:47, 209.08it/s]

finished frames 1510200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251837/1666666 [40:43<1:53:03, 208.57it/s]

finished frames 1510800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 251923/1666666 [40:43<1:52:45, 209.12it/s]

finished frames 1511400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252028/1666666 [40:43<1:55:31, 204.10it/s]

finished frames 1512000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252133/1666666 [40:44<1:53:28, 207.76it/s]

finished frames 1512600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252241/1666666 [40:45<1:54:06, 206.58it/s]

finished frames 1513200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252326/1666666 [40:45<1:59:56, 196.53it/s]

finished frames 1513800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252433/1666666 [40:45<1:53:47, 207.14it/s]

finished frames 1514400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252539/1666666 [40:46<1:53:06, 208.37it/s]

finished frames 1515000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252623/1666666 [40:46<1:53:14, 208.10it/s]

finished frames 1515600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252729/1666666 [40:47<1:52:38, 209.20it/s]

finished frames 1516200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252839/1666666 [40:47<1:50:10, 213.88it/s]

finished frames 1516800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 252927/1666666 [40:48<1:49:51, 214.49it/s]

finished frames 1517400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253037/1666666 [40:48<1:52:15, 209.88it/s]

finished frames 1518000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253125/1666666 [40:49<1:50:21, 213.48it/s]

finished frames 1518600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253235/1666666 [40:49<1:50:06, 213.93it/s]

finished frames 1519200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253323/1666666 [40:50<1:50:04, 213.99it/s]

finished frames 1519800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253433/1666666 [40:50<1:49:48, 214.51it/s]

finished frames 1520400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253543/1666666 [40:51<1:49:41, 214.71it/s]

finished frames 1521000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253631/1666666 [40:51<1:49:39, 214.76it/s]

finished frames 1521600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253741/1666666 [40:52<1:49:33, 214.94it/s]

finished frames 1522200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253828/1666666 [40:52<1:51:33, 211.08it/s]

finished frames 1522800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 253938/1666666 [40:53<1:49:44, 214.54it/s]

finished frames 1523400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254026/1666666 [40:53<1:51:58, 210.26it/s]

finished frames 1524000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254136/1666666 [40:53<1:49:18, 215.38it/s]

finished frames 1524600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254224/1666666 [40:54<1:49:15, 215.46it/s]

finished frames 1525200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254334/1666666 [40:54<1:48:54, 216.14it/s]

finished frames 1525800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254444/1666666 [40:55<1:49:14, 215.47it/s]

finished frames 1526400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254532/1666666 [40:55<1:48:40, 216.58it/s]

finished frames 1527000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254642/1666666 [40:56<1:52:35, 209.00it/s]

finished frames 1527600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254730/1666666 [40:56<1:56:30, 201.99it/s]

finished frames 1528200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254837/1666666 [40:57<1:52:53, 208.42it/s]

finished frames 1528800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 254923/1666666 [40:57<1:52:22, 209.37it/s]

finished frames 1529400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255028/1666666 [40:58<1:54:59, 204.60it/s]

finished frames 1530000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255136/1666666 [40:58<1:52:33, 209.02it/s]

finished frames 1530600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255243/1666666 [40:59<1:52:18, 209.45it/s]

finished frames 1531200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255329/1666666 [40:59<1:52:17, 209.48it/s]

finished frames 1531800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255437/1666666 [41:00<1:51:46, 210.44it/s]

finished frames 1532400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255524/1666666 [41:00<1:51:57, 210.08it/s]

finished frames 1533000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255632/1666666 [41:01<1:52:10, 209.64it/s]

finished frames 1533600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255739/1666666 [41:01<1:52:01, 209.93it/s]

finished frames 1534200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255825/1666666 [41:02<1:52:11, 209.59it/s]

finished frames 1534800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 255933/1666666 [41:02<1:51:54, 210.11it/s]

finished frames 1535400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256040/1666666 [41:03<1:54:12, 205.85it/s]

finished frames 1536000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256125/1666666 [41:03<1:52:37, 208.73it/s]

finished frames 1536600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256230/1666666 [41:04<1:52:42, 208.56it/s]

finished frames 1537200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256336/1666666 [41:04<1:52:20, 209.22it/s]

finished frames 1537800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256442/1666666 [41:05<1:52:18, 209.29it/s]

finished frames 1538400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256527/1666666 [41:05<1:52:27, 209.00it/s]

finished frames 1539000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256633/1666666 [41:05<1:52:33, 208.78it/s]

finished frames 1539600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256740/1666666 [41:06<1:52:14, 209.36it/s]

finished frames 1540200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256826/1666666 [41:06<1:52:02, 209.71it/s]

finished frames 1540800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 256933/1666666 [41:07<1:59:07, 197.24it/s]

finished frames 1541400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257018/1666666 [41:07<2:07:02, 184.93it/s]

finished frames 1542000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257124/1666666 [41:08<1:55:15, 203.81it/s]

finished frames 1542600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257232/1666666 [41:08<1:52:16, 209.22it/s]

finished frames 1543200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257341/1666666 [41:09<1:51:34, 210.51it/s]

finished frames 1543800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257429/1666666 [41:09<1:51:17, 211.04it/s]

finished frames 1544400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257539/1666666 [41:10<1:51:16, 211.07it/s]

finished frames 1545000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257626/1666666 [41:10<1:51:45, 210.13it/s]

finished frames 1545600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257736/1666666 [41:11<1:51:41, 210.23it/s]

finished frames 1546200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257824/1666666 [41:11<1:51:12, 211.13it/s]

finished frames 1546800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 257933/1666666 [41:12<1:51:36, 210.35it/s]

finished frames 1547400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 258039/1666666 [41:12<1:54:39, 204.77it/s]

finished frames 1548000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 258125/1666666 [41:13<1:52:31, 208.62it/s]

finished frames 1548600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 15%|█▌        | 258231/1666666 [41:13<1:52:18, 209.03it/s]

finished frames 1549200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 258339/1666666 [41:14<1:51:50, 209.88it/s]

finished frames 1549800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 258425/1666666 [41:14<1:51:12, 211.06it/s]

finished frames 1550400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 258534/1666666 [41:15<1:51:33, 210.38it/s]

finished frames 1551000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 258642/1666666 [41:15<1:51:21, 210.74it/s]

finished frames 1551600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 258729/1666666 [41:16<1:51:38, 210.17it/s]

finished frames 1552200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 258838/1666666 [41:16<1:51:39, 210.15it/s]

finished frames 1552800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 258925/1666666 [41:16<1:51:39, 210.13it/s]

finished frames 1553400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259033/1666666 [41:17<1:54:03, 205.70it/s]

finished frames 1554000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259140/1666666 [41:18<1:52:03, 209.35it/s]

finished frames 1554600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259226/1666666 [41:18<1:51:52, 209.67it/s]

finished frames 1555200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259332/1666666 [41:18<1:54:30, 204.85it/s]

finished frames 1555800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259439/1666666 [41:19<1:55:30, 203.06it/s]

finished frames 1556400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259525/1666666 [41:19<1:52:33, 208.36it/s]

finished frames 1557000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259632/1666666 [41:20<1:51:49, 209.71it/s]

finished frames 1557600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259739/1666666 [41:20<1:52:08, 209.09it/s]

finished frames 1558200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259823/1666666 [41:21<1:52:54, 207.65it/s]

finished frames 1558800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 259928/1666666 [41:21<1:52:53, 207.69it/s]

finished frames 1559400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260033/1666666 [41:22<1:54:59, 203.86it/s]

finished frames 1560000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260138/1666666 [41:22<1:53:05, 207.30it/s]

finished frames 1560600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260223/1666666 [41:23<1:52:32, 208.28it/s]

finished frames 1561200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260330/1666666 [41:23<1:52:06, 209.06it/s]

finished frames 1561800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260437/1666666 [41:24<1:51:50, 209.56it/s]

finished frames 1562400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260543/1666666 [41:24<1:52:04, 209.11it/s]

finished frames 1563000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260629/1666666 [41:25<1:51:44, 209.72it/s]

finished frames 1563600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260736/1666666 [41:25<1:51:42, 209.78it/s]

finished frames 1564200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260842/1666666 [41:26<1:51:52, 209.42it/s]

finished frames 1564800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 260926/1666666 [41:26<1:52:09, 208.90it/s]

finished frames 1565400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261031/1666666 [41:27<1:54:54, 203.89it/s]

finished frames 1566000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261137/1666666 [41:27<1:52:31, 208.16it/s]

finished frames 1566600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261243/1666666 [41:28<1:51:56, 209.23it/s]

finished frames 1567200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261329/1666666 [41:28<1:51:47, 209.51it/s]

finished frames 1567800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261436/1666666 [41:29<1:51:43, 209.63it/s]

finished frames 1568400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261543/1666666 [41:29<1:51:45, 209.54it/s]

finished frames 1569000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261627/1666666 [41:30<1:57:02, 200.07it/s]

finished frames 1569600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261733/1666666 [41:30<1:59:35, 195.80it/s]

finished frames 1570200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261840/1666666 [41:31<1:52:56, 207.30it/s]

finished frames 1570800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 261926/1666666 [41:31<1:52:04, 208.90it/s]

finished frames 1571400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262032/1666666 [41:32<1:54:27, 204.54it/s]

finished frames 1572000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262137/1666666 [41:32<1:52:21, 208.36it/s]

finished frames 1572600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262242/1666666 [41:33<1:52:14, 208.55it/s]

finished frames 1573200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262328/1666666 [41:33<1:51:46, 209.40it/s]

finished frames 1573800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262433/1666666 [41:33<1:52:11, 208.59it/s]

finished frames 1574400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262540/1666666 [41:34<1:51:48, 209.30it/s]

finished frames 1575000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262625/1666666 [41:34<1:52:11, 208.58it/s]

finished frames 1575600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262731/1666666 [41:35<1:51:46, 209.34it/s]

finished frames 1576200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262840/1666666 [41:35<1:50:26, 211.86it/s]

finished frames 1576800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 262928/1666666 [41:36<1:50:39, 211.41it/s]

finished frames 1577400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263035/1666666 [41:36<1:54:01, 205.16it/s]

finished frames 1578000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263141/1666666 [41:37<1:52:07, 208.62it/s]

finished frames 1578600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263226/1666666 [41:37<1:51:57, 208.91it/s]

finished frames 1579200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263332/1666666 [41:38<1:51:41, 209.42it/s]

finished frames 1579800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263437/1666666 [41:38<1:51:46, 209.24it/s]

finished frames 1580400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263524/1666666 [41:39<1:51:39, 209.45it/s]

finished frames 1581000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263631/1666666 [41:39<1:51:33, 209.60it/s]

finished frames 1581600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263739/1666666 [41:40<1:51:28, 209.77it/s]

finished frames 1582200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263825/1666666 [41:40<1:51:44, 209.23it/s]

finished frames 1582800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 263930/1666666 [41:41<2:03:25, 189.42it/s]

finished frames 1583400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264014/1666666 [41:41<1:57:54, 198.27it/s]

finished frames 1584000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264140/1666666 [41:42<1:54:50, 203.55it/s]

finished frames 1584600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264225/1666666 [41:42<1:52:45, 207.30it/s]

finished frames 1585200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264330/1666666 [41:43<1:52:04, 208.53it/s]

finished frames 1585800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264436/1666666 [41:43<1:51:45, 209.10it/s]

finished frames 1586400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264543/1666666 [41:44<1:51:38, 209.33it/s]

finished frames 1587000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264628/1666666 [41:44<1:51:51, 208.91it/s]

finished frames 1587600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264733/1666666 [41:45<1:51:57, 208.69it/s]

finished frames 1588200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264840/1666666 [41:45<1:51:28, 209.57it/s]

finished frames 1588800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 264926/1666666 [41:45<1:51:32, 209.46it/s]

finished frames 1589400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265032/1666666 [41:46<1:53:54, 205.10it/s]

finished frames 1590000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265140/1666666 [41:47<1:51:25, 209.64it/s]

finished frames 1590600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265227/1666666 [41:47<1:51:12, 210.02it/s]

finished frames 1591200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265336/1666666 [41:47<1:50:28, 211.40it/s]

finished frames 1591800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265424/1666666 [41:48<1:50:23, 211.55it/s]

finished frames 1592400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265532/1666666 [41:48<1:51:12, 209.99it/s]

finished frames 1593000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265642/1666666 [41:49<1:50:19, 211.64it/s]

finished frames 1593600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265730/1666666 [41:49<1:50:23, 211.52it/s]

finished frames 1594200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265840/1666666 [41:50<1:50:39, 210.98it/s]

finished frames 1594800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 265928/1666666 [41:50<1:50:28, 211.32it/s]

finished frames 1595400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266038/1666666 [41:51<1:52:12, 208.04it/s]

finished frames 1596000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266125/1666666 [41:51<1:50:52, 210.52it/s]

finished frames 1596600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266235/1666666 [41:52<1:50:36, 211.00it/s]

finished frames 1597200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266323/1666666 [41:52<1:53:35, 205.47it/s]

finished frames 1597800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266431/1666666 [41:53<2:00:12, 194.13it/s]

finished frames 1598400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266539/1666666 [41:53<1:53:48, 205.05it/s]

finished frames 1599000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266627/1666666 [41:54<1:49:31, 213.05it/s]

finished frames 1599600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266738/1666666 [41:54<1:48:14, 215.54it/s]

finished frames 1600200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266826/1666666 [41:55<1:57:49, 198.01it/s]

finished frames 1600800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 266935/1666666 [41:55<1:50:14, 211.60it/s]

finished frames 1601400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267022/1666666 [41:56<1:54:02, 204.54it/s]

finished frames 1602000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267129/1666666 [41:56<1:50:29, 211.09it/s]

finished frames 1602600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267239/1666666 [41:57<1:52:02, 208.18it/s]

finished frames 1603200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267326/1666666 [41:57<1:49:04, 213.82it/s]

finished frames 1603800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267436/1666666 [41:58<1:47:44, 216.46it/s]

finished frames 1604400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267524/1666666 [41:58<1:47:37, 216.67it/s]

finished frames 1605000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267634/1666666 [41:58<1:47:30, 216.90it/s]

finished frames 1605600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267744/1666666 [41:59<1:47:24, 217.06it/s]

finished frames 1606200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267832/1666666 [41:59<1:47:12, 217.46it/s]

finished frames 1606800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 267942/1666666 [42:00<1:47:16, 217.32it/s]

finished frames 1607400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268030/1666666 [42:00<1:49:41, 212.50it/s]

finished frames 1608000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268140/1666666 [42:01<1:47:38, 216.55it/s]

finished frames 1608600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268228/1666666 [42:01<1:47:19, 217.16it/s]

finished frames 1609200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268338/1666666 [42:02<1:48:13, 215.35it/s]

finished frames 1609800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268426/1666666 [42:02<1:48:35, 214.62it/s]

finished frames 1610400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268536/1666666 [42:03<1:49:19, 213.13it/s]

finished frames 1611000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268624/1666666 [42:03<1:49:48, 212.19it/s]

finished frames 1611600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268734/1666666 [42:04<1:54:07, 204.16it/s]

finished frames 1612200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268840/1666666 [42:04<1:55:20, 201.99it/s]

finished frames 1612800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 268926/1666666 [42:05<1:52:13, 207.59it/s]

finished frames 1613400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269032/1666666 [42:05<1:53:58, 204.38it/s]

finished frames 1614000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269140/1666666 [42:06<1:51:26, 209.01it/s]

finished frames 1614600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269226/1666666 [42:06<1:51:05, 209.64it/s]

finished frames 1615200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269333/1666666 [42:07<1:50:46, 210.22it/s]

finished frames 1615800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269443/1666666 [42:07<1:50:34, 210.59it/s]

finished frames 1616400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269531/1666666 [42:07<1:50:27, 210.80it/s]

finished frames 1617000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269641/1666666 [42:08<1:50:41, 210.34it/s]

finished frames 1617600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269729/1666666 [42:08<1:50:21, 210.97it/s]

finished frames 1618200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269839/1666666 [42:09<1:50:01, 211.59it/s]

finished frames 1618800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 269927/1666666 [42:09<1:50:28, 210.71it/s]

finished frames 1619400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 270036/1666666 [42:10<1:53:16, 205.48it/s]

finished frames 1620000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 270122/1666666 [42:10<1:51:39, 208.44it/s]

finished frames 1620600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 270230/1666666 [42:11<1:50:47, 210.06it/s]

finished frames 1621200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 270337/1666666 [42:11<1:51:06, 209.46it/s]

finished frames 1621800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 270442/1666666 [42:12<1:51:25, 208.86it/s]

finished frames 1622400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 270528/1666666 [42:12<1:51:17, 209.09it/s]

finished frames 1623000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 270634/1666666 [42:13<1:51:15, 209.13it/s]

finished frames 1623600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 270740/1666666 [42:13<1:51:10, 209.26it/s]

finished frames 1624200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▌        | 270824/1666666 [42:14<1:51:22, 208.88it/s]

finished frames 1624800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 270932/1666666 [42:14<1:50:35, 210.35it/s]

finished frames 1625400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271039/1666666 [42:15<1:57:51, 197.36it/s]

finished frames 1626000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271123/1666666 [42:15<2:00:10, 193.53it/s]

finished frames 1626600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271228/1666666 [42:16<1:53:33, 204.81it/s]

finished frames 1627200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271334/1666666 [42:16<1:51:56, 207.75it/s]

finished frames 1627800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271443/1666666 [42:17<1:50:54, 209.68it/s]

finished frames 1628400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271529/1666666 [42:17<1:50:00, 211.36it/s]

finished frames 1629000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271639/1666666 [42:18<1:48:05, 215.10it/s]

finished frames 1629600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271727/1666666 [42:18<1:48:00, 215.24it/s]

finished frames 1630200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271837/1666666 [42:19<1:47:45, 215.73it/s]

finished frames 1630800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 271925/1666666 [42:19<1:47:39, 215.92it/s]

finished frames 1631400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272035/1666666 [42:19<1:49:55, 211.47it/s]

finished frames 1632000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272123/1666666 [42:20<1:48:21, 214.50it/s]

finished frames 1632600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272233/1666666 [42:20<1:47:44, 215.69it/s]

finished frames 1633200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272343/1666666 [42:21<1:48:30, 214.17it/s]

finished frames 1633800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272431/1666666 [42:21<1:47:50, 215.47it/s]

finished frames 1634400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272541/1666666 [42:22<1:47:33, 216.04it/s]

finished frames 1635000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272629/1666666 [42:22<1:47:57, 215.20it/s]

finished frames 1635600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272739/1666666 [42:23<1:47:47, 215.52it/s]

finished frames 1636200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272827/1666666 [42:23<1:47:54, 215.27it/s]

finished frames 1636800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 272937/1666666 [42:24<1:47:30, 216.07it/s]

finished frames 1637400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273025/1666666 [42:24<1:50:18, 210.58it/s]

finished frames 1638000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273135/1666666 [42:25<1:49:11, 212.70it/s]

finished frames 1638600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273223/1666666 [42:25<1:49:22, 212.34it/s]

finished frames 1639200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273333/1666666 [42:26<1:49:30, 212.05it/s]

finished frames 1639800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273443/1666666 [42:26<1:52:27, 206.47it/s]

finished frames 1640400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273530/1666666 [42:26<1:50:09, 210.78it/s]

finished frames 1641000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273640/1666666 [42:27<1:48:41, 213.59it/s]

finished frames 1641600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273728/1666666 [42:27<1:49:05, 212.81it/s]

finished frames 1642200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273838/1666666 [42:28<1:49:04, 212.83it/s]

finished frames 1642800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 273926/1666666 [42:28<1:49:03, 212.85it/s]

finished frames 1643400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274035/1666666 [42:29<1:52:55, 205.54it/s]

finished frames 1644000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274143/1666666 [42:29<1:50:41, 209.67it/s]

finished frames 1644600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274229/1666666 [42:30<1:50:16, 210.44it/s]

finished frames 1645200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274336/1666666 [42:30<1:50:43, 209.58it/s]

finished frames 1645800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274442/1666666 [42:31<1:50:44, 209.54it/s]

finished frames 1646400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274528/1666666 [42:31<1:50:25, 210.11it/s]

finished frames 1647000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274635/1666666 [42:32<1:50:43, 209.52it/s]

finished frames 1647600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274741/1666666 [42:32<1:50:39, 209.63it/s]

finished frames 1648200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274827/1666666 [42:33<1:50:33, 209.83it/s]

finished frames 1648800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 16%|█▋        | 274935/1666666 [42:33<1:50:12, 210.47it/s]

finished frames 1649400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275021/1666666 [42:34<1:55:10, 201.38it/s]

finished frames 1650000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275127/1666666 [42:34<1:51:31, 207.96it/s]

finished frames 1650600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275234/1666666 [42:35<1:50:49, 209.26it/s]

finished frames 1651200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275339/1666666 [42:35<1:50:57, 209.00it/s]

finished frames 1651800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275424/1666666 [42:36<1:50:51, 209.18it/s]

finished frames 1652400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275529/1666666 [42:36<1:50:59, 208.89it/s]

finished frames 1653000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275636/1666666 [42:37<1:50:19, 210.13it/s]

finished frames 1653600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275724/1666666 [42:37<2:08:34, 180.31it/s]

finished frames 1654200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275832/1666666 [42:38<1:52:48, 205.48it/s]

finished frames 1654800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 275939/1666666 [42:38<1:50:58, 208.85it/s]

finished frames 1655400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276023/1666666 [42:38<1:54:18, 202.75it/s]

finished frames 1656000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276132/1666666 [42:39<1:50:22, 209.98it/s]

finished frames 1656600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276241/1666666 [42:40<1:49:58, 210.72it/s]

finished frames 1657200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276328/1666666 [42:40<1:50:04, 210.52it/s]

finished frames 1657800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276436/1666666 [42:40<1:49:47, 211.06it/s]

finished frames 1658400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276524/1666666 [42:41<1:49:44, 211.11it/s]

finished frames 1659000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276634/1666666 [42:41<1:49:56, 210.72it/s]

finished frames 1659600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276743/1666666 [42:42<1:49:57, 210.69it/s]

finished frames 1660200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276829/1666666 [42:42<1:50:26, 209.74it/s]

finished frames 1660800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 276936/1666666 [42:43<1:50:23, 209.82it/s]

finished frames 1661400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277020/1666666 [42:43<1:54:34, 202.14it/s]

finished frames 1662000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277128/1666666 [42:44<1:51:09, 208.33it/s]

finished frames 1662600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277236/1666666 [42:44<1:49:39, 211.19it/s]

finished frames 1663200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277324/1666666 [42:45<1:48:45, 212.92it/s]

finished frames 1663800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277434/1666666 [42:45<1:48:20, 213.71it/s]

finished frames 1664400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277544/1666666 [42:46<1:47:18, 215.76it/s]

finished frames 1665000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277632/1666666 [42:46<1:47:36, 215.14it/s]

finished frames 1665600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277742/1666666 [42:47<1:48:25, 213.51it/s]

finished frames 1666200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277830/1666666 [42:47<1:48:04, 214.16it/s]

finished frames 1666800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 277940/1666666 [42:48<1:48:04, 214.16it/s]

finished frames 1667400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278028/1666666 [42:48<1:50:41, 209.09it/s]

finished frames 1668000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278138/1666666 [42:49<1:52:36, 205.52it/s]

finished frames 1668600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278226/1666666 [42:49<1:55:15, 200.76it/s]

finished frames 1669200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278336/1666666 [42:49<1:48:50, 212.60it/s]

finished frames 1669800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278424/1666666 [42:50<1:47:41, 214.86it/s]

finished frames 1670400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278534/1666666 [42:50<1:46:54, 216.41it/s]

finished frames 1671000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278644/1666666 [42:51<1:46:47, 216.64it/s]

finished frames 1671600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278732/1666666 [42:51<1:46:52, 216.46it/s]

finished frames 1672200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278842/1666666 [42:52<1:46:55, 216.31it/s]

finished frames 1672800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 278930/1666666 [42:52<1:46:56, 216.26it/s]

finished frames 1673400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279040/1666666 [42:53<1:49:53, 210.46it/s]

finished frames 1674000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279128/1666666 [42:53<1:48:50, 212.47it/s]

finished frames 1674600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279238/1666666 [42:54<1:48:28, 213.16it/s]

finished frames 1675200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279326/1666666 [42:54<1:48:41, 212.73it/s]

finished frames 1675800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279436/1666666 [42:55<1:48:19, 213.42it/s]

finished frames 1676400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279524/1666666 [42:55<1:48:55, 212.26it/s]

finished frames 1677000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279634/1666666 [42:56<1:48:10, 213.70it/s]

finished frames 1677600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279744/1666666 [42:56<1:48:19, 213.40it/s]

finished frames 1678200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279832/1666666 [42:57<1:48:10, 213.68it/s]

finished frames 1678800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 279942/1666666 [42:57<1:47:52, 214.26it/s]

finished frames 1679400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280030/1666666 [42:57<1:50:51, 208.47it/s]

finished frames 1680000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280139/1666666 [42:58<1:48:39, 212.66it/s]

finished frames 1680600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280227/1666666 [42:58<1:48:26, 213.10it/s]

finished frames 1681200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280337/1666666 [42:59<1:48:21, 213.24it/s]

finished frames 1681800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280425/1666666 [42:59<1:48:22, 213.19it/s]

finished frames 1682400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280535/1666666 [43:00<1:48:08, 213.63it/s]

finished frames 1683000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280623/1666666 [43:00<1:48:40, 212.57it/s]

finished frames 1683600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280733/1666666 [43:01<1:49:55, 210.12it/s]

finished frames 1684200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280840/1666666 [43:01<1:50:12, 209.59it/s]

finished frames 1684800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 280925/1666666 [43:02<1:50:29, 209.02it/s]

finished frames 1685400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281031/1666666 [43:02<1:53:14, 203.94it/s]

finished frames 1686000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281137/1666666 [43:03<1:50:52, 208.27it/s]

finished frames 1686600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281243/1666666 [43:03<1:50:29, 208.97it/s]

finished frames 1687200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281328/1666666 [43:04<1:50:37, 208.70it/s]

finished frames 1687800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281435/1666666 [43:04<1:50:17, 209.34it/s]

finished frames 1688400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281541/1666666 [43:05<1:50:24, 209.10it/s]

finished frames 1689000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281626/1666666 [43:05<1:50:39, 208.61it/s]

finished frames 1689600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281734/1666666 [43:06<1:50:21, 209.16it/s]

finished frames 1690200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281840/1666666 [43:06<1:50:27, 208.96it/s]

finished frames 1690800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 281925/1666666 [43:07<1:50:38, 208.59it/s]

finished frames 1691400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282030/1666666 [43:07<1:53:07, 204.00it/s]

finished frames 1692000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282138/1666666 [43:08<1:50:23, 209.04it/s]

finished frames 1692600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282223/1666666 [43:08<1:50:33, 208.71it/s]

finished frames 1693200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282329/1666666 [43:08<1:50:27, 208.86it/s]

finished frames 1693800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282435/1666666 [43:09<1:50:32, 208.71it/s]

finished frames 1694400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282540/1666666 [43:09<1:50:30, 208.75it/s]

finished frames 1695000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282625/1666666 [43:10<1:50:38, 208.47it/s]

finished frames 1695600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282731/1666666 [43:10<1:50:32, 208.66it/s]

finished frames 1696200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282838/1666666 [43:11<1:50:23, 208.92it/s]

finished frames 1696800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 282923/1666666 [43:11<1:50:29, 208.74it/s]

finished frames 1697400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283028/1666666 [43:12<1:53:09, 203.78it/s]

finished frames 1698000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283135/1666666 [43:12<1:50:37, 208.43it/s]

finished frames 1698600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283240/1666666 [43:13<1:50:24, 208.84it/s]

finished frames 1699200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283324/1666666 [43:13<1:50:31, 208.61it/s]

finished frames 1699800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283430/1666666 [43:14<1:50:29, 208.66it/s]

finished frames 1700400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283537/1666666 [43:14<1:50:07, 209.33it/s]

finished frames 1701000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283622/1666666 [43:15<1:50:26, 208.72it/s]

finished frames 1701600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283730/1666666 [43:15<1:50:13, 209.09it/s]

finished frames 1702200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283836/1666666 [43:16<1:50:14, 209.05it/s]

finished frames 1702800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 283942/1666666 [43:16<1:50:12, 209.11it/s]

finished frames 1703400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284026/1666666 [43:17<1:52:54, 204.10it/s]

finished frames 1704000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284132/1666666 [43:17<1:50:49, 207.91it/s]

finished frames 1704600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284238/1666666 [43:18<1:50:17, 208.89it/s]

finished frames 1705200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284343/1666666 [43:18<1:50:24, 208.67it/s]

finished frames 1705800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284428/1666666 [43:19<1:50:27, 208.55it/s]

finished frames 1706400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284535/1666666 [43:19<1:50:04, 209.27it/s]

finished frames 1707000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284641/1666666 [43:20<1:50:14, 208.93it/s]

finished frames 1707600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284727/1666666 [43:20<1:50:14, 208.91it/s]

finished frames 1708200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284833/1666666 [43:21<1:50:09, 209.07it/s]

finished frames 1708800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 284941/1666666 [43:21<1:49:44, 209.85it/s]

finished frames 1709400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285025/1666666 [43:21<1:52:44, 204.26it/s]

finished frames 1710000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285130/1666666 [43:22<1:50:51, 207.70it/s]

finished frames 1710600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285236/1666666 [43:22<1:53:32, 202.79it/s]

finished frames 1711200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285341/1666666 [43:23<1:57:48, 195.42it/s]

finished frames 1711800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285428/1666666 [43:23<1:51:37, 206.24it/s]

finished frames 1712400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285536/1666666 [43:24<1:49:41, 209.84it/s]

finished frames 1713000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285624/1666666 [43:24<1:48:57, 211.24it/s]

finished frames 1713600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285734/1666666 [43:25<1:48:50, 211.47it/s]

finished frames 1714200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285822/1666666 [43:25<1:48:41, 211.72it/s]

finished frames 1714800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 285932/1666666 [43:26<1:49:00, 211.12it/s]

finished frames 1715400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286020/1666666 [43:26<1:52:07, 205.21it/s]

finished frames 1716000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286129/1666666 [43:27<1:49:34, 210.00it/s]

finished frames 1716600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286236/1666666 [43:27<1:49:38, 209.83it/s]

finished frames 1717200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286323/1666666 [43:28<1:49:16, 210.53it/s]

finished frames 1717800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286432/1666666 [43:28<1:49:26, 210.21it/s]

finished frames 1718400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286539/1666666 [43:29<1:49:59, 209.13it/s]

finished frames 1719000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286623/1666666 [43:29<1:50:35, 207.97it/s]

finished frames 1719600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286728/1666666 [43:30<1:50:39, 207.84it/s]

finished frames 1720200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286833/1666666 [43:30<1:50:34, 207.98it/s]

finished frames 1720800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 286938/1666666 [43:31<1:50:30, 208.09it/s]

finished frames 1721400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287022/1666666 [43:31<1:53:11, 203.13it/s]

finished frames 1722000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287129/1666666 [43:32<1:50:26, 208.17it/s]

finished frames 1722600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287237/1666666 [43:32<1:49:23, 210.16it/s]

finished frames 1723200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287324/1666666 [43:33<1:49:06, 210.69it/s]

finished frames 1723800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287432/1666666 [43:33<1:49:33, 209.80it/s]

finished frames 1724400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287538/1666666 [43:34<1:55:17, 199.37it/s]

finished frames 1725000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287623/1666666 [43:34<2:00:46, 190.29it/s]

finished frames 1725600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287731/1666666 [43:35<1:51:22, 206.34it/s]

finished frames 1726200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287839/1666666 [43:35<1:49:46, 209.33it/s]

finished frames 1726800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 287924/1666666 [43:36<1:49:54, 209.09it/s]

finished frames 1727400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288030/1666666 [43:36<1:51:58, 205.19it/s]

finished frames 1728000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288138/1666666 [43:37<1:49:42, 209.42it/s]

finished frames 1728600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288225/1666666 [43:37<1:49:19, 210.13it/s]

finished frames 1729200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288334/1666666 [43:37<1:49:17, 210.19it/s]

finished frames 1729800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288442/1666666 [43:38<1:49:17, 210.18it/s]

finished frames 1730400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288530/1666666 [43:38<1:49:14, 210.25it/s]

finished frames 1731000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288640/1666666 [43:39<1:47:47, 213.08it/s]

finished frames 1731600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288728/1666666 [43:39<1:47:24, 213.83it/s]

finished frames 1732200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288838/1666666 [43:40<1:47:08, 214.32it/s]

finished frames 1732800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 288926/1666666 [43:40<1:47:12, 214.18it/s]

finished frames 1733400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289036/1666666 [43:41<1:50:05, 208.57it/s]

finished frames 1734000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289123/1666666 [43:41<1:48:09, 212.27it/s]

finished frames 1734600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289233/1666666 [43:42<1:47:24, 213.74it/s]

finished frames 1735200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289343/1666666 [43:42<1:47:04, 214.38it/s]

finished frames 1735800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289431/1666666 [43:43<1:47:10, 214.17it/s]

finished frames 1736400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289541/1666666 [43:43<1:46:56, 214.61it/s]

finished frames 1737000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289629/1666666 [43:44<1:47:09, 214.19it/s]

finished frames 1737600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289739/1666666 [43:44<1:47:00, 214.45it/s]

finished frames 1738200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289827/1666666 [43:44<1:46:43, 215.01it/s]

finished frames 1738800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 289937/1666666 [43:45<1:52:20, 204.24it/s]

finished frames 1739400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290023/1666666 [43:45<1:51:19, 206.09it/s]

finished frames 1740000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290131/1666666 [43:46<1:49:34, 209.37it/s]

finished frames 1740600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290239/1666666 [43:47<1:49:12, 210.06it/s]

finished frames 1741200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290326/1666666 [43:47<1:49:08, 210.17it/s]

finished frames 1741800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290434/1666666 [43:47<1:49:11, 210.06it/s]

finished frames 1742400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290543/1666666 [43:48<1:49:02, 210.33it/s]

finished frames 1743000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290631/1666666 [43:48<1:48:56, 210.53it/s]

finished frames 1743600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290740/1666666 [43:49<1:48:34, 211.20it/s]

finished frames 1744200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290827/1666666 [43:49<1:48:56, 210.48it/s]

finished frames 1744800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 290936/1666666 [43:50<1:48:41, 210.94it/s]

finished frames 1745400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 291022/1666666 [43:50<1:51:16, 206.03it/s]

finished frames 1746000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 291130/1666666 [43:51<1:49:14, 209.87it/s]

finished frames 1746600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 291238/1666666 [43:51<1:48:50, 210.61it/s]

finished frames 1747200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 291323/1666666 [43:52<1:50:10, 208.07it/s]

finished frames 1747800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 291430/1666666 [43:52<1:49:39, 209.01it/s]

finished frames 1748400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 291536/1666666 [43:53<1:49:41, 208.94it/s]

finished frames 1749000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 17%|█▋        | 291642/1666666 [43:53<1:50:32, 207.31it/s]

finished frames 1749600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 291730/1666666 [43:54<1:47:17, 213.59it/s]

finished frames 1750200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 291840/1666666 [43:54<1:47:41, 212.78it/s]

finished frames 1750800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 291927/1666666 [43:55<1:49:39, 208.94it/s]

finished frames 1751400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292033/1666666 [43:55<1:53:40, 201.55it/s]

finished frames 1752000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292138/1666666 [43:56<1:51:15, 205.92it/s]

finished frames 1752600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292222/1666666 [43:56<1:59:28, 191.74it/s]

finished frames 1753200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292333/1666666 [43:57<1:47:29, 213.09it/s]

finished frames 1753800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292443/1666666 [43:57<1:46:39, 214.73it/s]

finished frames 1754400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292531/1666666 [43:57<1:45:49, 216.42it/s]

finished frames 1755000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292642/1666666 [43:58<1:44:59, 218.12it/s]

finished frames 1755600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292730/1666666 [43:58<1:45:06, 217.87it/s]

finished frames 1756200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292840/1666666 [43:59<1:45:09, 217.74it/s]

finished frames 1756800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 292928/1666666 [43:59<1:45:48, 216.39it/s]

finished frames 1757400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293038/1666666 [44:00<1:48:43, 210.57it/s]

finished frames 1758000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293126/1666666 [44:00<1:46:59, 213.96it/s]

finished frames 1758600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293236/1666666 [44:01<1:46:58, 213.96it/s]

finished frames 1759200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293324/1666666 [44:01<1:46:50, 214.23it/s]

finished frames 1759800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293434/1666666 [44:02<1:46:58, 213.95it/s]

finished frames 1760400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293544/1666666 [44:02<1:46:15, 215.37it/s]

finished frames 1761000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293632/1666666 [44:03<1:46:22, 215.14it/s]

finished frames 1761600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293742/1666666 [44:03<1:46:19, 215.21it/s]

finished frames 1762200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293830/1666666 [44:04<1:46:17, 215.26it/s]

finished frames 1762800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 293940/1666666 [44:04<1:46:30, 214.81it/s]

finished frames 1763400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294028/1666666 [44:04<1:49:22, 209.17it/s]

finished frames 1764000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294138/1666666 [44:05<1:46:55, 213.95it/s]

finished frames 1764600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294226/1666666 [44:05<1:46:43, 214.34it/s]

finished frames 1765200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294336/1666666 [44:06<1:46:28, 214.80it/s]

finished frames 1765800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294424/1666666 [44:06<1:46:31, 214.69it/s]

finished frames 1766400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294534/1666666 [44:07<1:48:17, 211.18it/s]

finished frames 1767000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294620/1666666 [44:07<2:18:07, 165.55it/s]

finished frames 1767600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294729/1666666 [44:08<1:52:11, 203.82it/s]

finished frames 1768200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294839/1666666 [44:08<1:47:11, 213.31it/s]

finished frames 1768800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 294927/1666666 [44:09<1:46:52, 213.92it/s]

finished frames 1769400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295037/1666666 [44:09<1:49:10, 209.39it/s]

finished frames 1770000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295125/1666666 [44:10<1:46:43, 214.18it/s]

finished frames 1770600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295235/1666666 [44:10<1:45:34, 216.51it/s]

finished frames 1771200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295323/1666666 [44:11<1:46:43, 214.16it/s]

finished frames 1771800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295433/1666666 [44:11<1:46:26, 214.69it/s]

finished frames 1772400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295543/1666666 [44:12<1:46:05, 215.40it/s]

finished frames 1773000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295631/1666666 [44:12<1:46:08, 215.29it/s]

finished frames 1773600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295741/1666666 [44:13<1:45:48, 215.94it/s]

finished frames 1774200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295829/1666666 [44:13<1:45:47, 215.96it/s]

finished frames 1774800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 295939/1666666 [44:14<1:45:42, 216.10it/s]

finished frames 1775400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296027/1666666 [44:14<1:48:14, 211.04it/s]

finished frames 1776000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296137/1666666 [44:14<1:46:11, 215.10it/s]

finished frames 1776600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296225/1666666 [44:15<1:45:49, 215.85it/s]

finished frames 1777200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296335/1666666 [44:15<1:45:52, 215.73it/s]

finished frames 1777800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296423/1666666 [44:16<1:46:12, 215.02it/s]

finished frames 1778400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296533/1666666 [44:16<1:45:57, 215.51it/s]

finished frames 1779000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296643/1666666 [44:17<1:45:07, 217.22it/s]

finished frames 1779600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296731/1666666 [44:17<1:45:54, 215.58it/s]

finished frames 1780200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296841/1666666 [44:18<1:45:41, 216.02it/s]

finished frames 1780800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 296929/1666666 [44:18<1:45:52, 215.64it/s]

finished frames 1781400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297017/1666666 [44:19<1:57:49, 193.75it/s]

finished frames 1782000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297127/1666666 [44:19<1:48:01, 211.31it/s]

finished frames 1782600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297237/1666666 [44:20<1:48:17, 210.77it/s]

finished frames 1783200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297325/1666666 [44:20<1:46:29, 214.32it/s]

finished frames 1783800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297435/1666666 [44:21<1:45:59, 215.30it/s]

finished frames 1784400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297523/1666666 [44:21<1:45:45, 215.78it/s]

finished frames 1785000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297633/1666666 [44:21<1:45:49, 215.63it/s]

finished frames 1785600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297743/1666666 [44:22<1:45:53, 215.45it/s]

finished frames 1786200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297831/1666666 [44:22<1:45:52, 215.47it/s]

finished frames 1786800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 297941/1666666 [44:23<1:45:30, 216.19it/s]

finished frames 1787400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298029/1666666 [44:23<1:47:58, 211.24it/s]

finished frames 1788000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298139/1666666 [44:24<1:46:12, 214.76it/s]

finished frames 1788600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298227/1666666 [44:24<1:45:55, 215.30it/s]

finished frames 1789200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298337/1666666 [44:25<1:45:44, 215.66it/s]

finished frames 1789800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298425/1666666 [44:25<1:45:52, 215.38it/s]

finished frames 1790400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298535/1666666 [44:26<1:45:55, 215.28it/s]

finished frames 1791000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298623/1666666 [44:26<1:45:37, 215.86it/s]

finished frames 1791600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298733/1666666 [44:27<1:45:36, 215.87it/s]

finished frames 1792200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298843/1666666 [44:27<1:45:47, 215.48it/s]

finished frames 1792800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 298931/1666666 [44:28<1:45:46, 215.51it/s]

finished frames 1793400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299041/1666666 [44:28<1:48:03, 210.95it/s]

finished frames 1794000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299129/1666666 [44:28<1:46:20, 214.34it/s]

finished frames 1794600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299239/1666666 [44:29<1:45:51, 215.30it/s]

finished frames 1795200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299327/1666666 [44:29<1:45:45, 215.47it/s]

finished frames 1795800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299437/1666666 [44:30<1:55:00, 198.15it/s]

finished frames 1796400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299524/1666666 [44:30<1:48:00, 210.95it/s]

finished frames 1797000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299634/1666666 [44:31<1:46:17, 214.37it/s]

finished frames 1797600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299744/1666666 [44:31<1:45:55, 215.06it/s]

finished frames 1798200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299832/1666666 [44:32<1:45:48, 215.30it/s]

finished frames 1798800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 299942/1666666 [44:32<1:45:36, 215.70it/s]

finished frames 1799400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300030/1666666 [44:33<1:47:53, 211.12it/s]

finished frames 1800000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300140/1666666 [44:33<1:45:13, 216.44it/s]

finished frames 1800600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300228/1666666 [44:34<1:44:51, 217.17it/s]

finished frames 1801200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300338/1666666 [44:34<1:46:36, 213.61it/s]

finished frames 1801800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300426/1666666 [44:35<1:46:05, 214.63it/s]

finished frames 1802400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300536/1666666 [44:35<1:45:32, 215.74it/s]

finished frames 1803000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300624/1666666 [44:35<1:46:55, 212.91it/s]

finished frames 1803600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300734/1666666 [44:36<1:47:13, 212.32it/s]

finished frames 1804200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300822/1666666 [44:36<1:47:12, 212.34it/s]

finished frames 1804800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 300932/1666666 [44:37<1:47:09, 212.41it/s]

finished frames 1805400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301020/1666666 [44:37<1:50:28, 206.01it/s]

finished frames 1806000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301129/1666666 [44:38<1:47:35, 211.53it/s]

finished frames 1806600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301239/1666666 [44:38<1:47:06, 212.46it/s]

finished frames 1807200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301327/1666666 [44:39<1:47:17, 212.08it/s]

finished frames 1807800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301437/1666666 [44:39<1:46:47, 213.08it/s]

finished frames 1808400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301525/1666666 [44:40<1:47:11, 212.27it/s]

finished frames 1809000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301635/1666666 [44:40<1:47:01, 212.58it/s]

finished frames 1809600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301723/1666666 [44:41<1:47:12, 212.18it/s]

finished frames 1810200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301833/1666666 [44:41<1:56:22, 195.48it/s]

finished frames 1810800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 301942/1666666 [44:42<1:48:41, 209.26it/s]

finished frames 1811400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302028/1666666 [44:42<1:49:54, 206.93it/s]

finished frames 1812000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302137/1666666 [44:43<1:46:47, 212.94it/s]

finished frames 1812600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302225/1666666 [44:43<1:46:22, 213.77it/s]

finished frames 1813200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302335/1666666 [44:44<1:46:33, 213.39it/s]

finished frames 1813800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302423/1666666 [44:44<1:47:22, 211.77it/s]

finished frames 1814400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302533/1666666 [44:45<1:46:53, 212.68it/s]

finished frames 1815000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302643/1666666 [44:45<1:46:59, 212.48it/s]

finished frames 1815600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302731/1666666 [44:45<1:47:17, 211.88it/s]

finished frames 1816200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302841/1666666 [44:46<1:47:11, 212.06it/s]

finished frames 1816800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 302929/1666666 [44:46<1:46:55, 212.57it/s]

finished frames 1817400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303036/1666666 [44:47<1:53:43, 199.84it/s]

finished frames 1818000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303142/1666666 [44:47<1:49:28, 207.60it/s]

finished frames 1818600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303229/1666666 [44:48<1:48:28, 209.50it/s]

finished frames 1819200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303337/1666666 [44:48<1:47:27, 211.46it/s]

finished frames 1819800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303425/1666666 [44:49<1:47:03, 212.21it/s]

finished frames 1820400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303535/1666666 [44:49<1:46:16, 213.79it/s]

finished frames 1821000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303623/1666666 [44:50<1:45:52, 214.57it/s]

finished frames 1821600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303733/1666666 [44:50<1:45:45, 214.80it/s]

finished frames 1822200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303843/1666666 [44:51<1:45:51, 214.57it/s]

finished frames 1822800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 303931/1666666 [44:51<1:45:57, 214.33it/s]

finished frames 1823400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304041/1666666 [44:52<1:48:23, 209.53it/s]

finished frames 1824000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304128/1666666 [44:52<1:46:27, 213.31it/s]

finished frames 1824600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304238/1666666 [44:53<1:45:51, 214.50it/s]

finished frames 1825200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304326/1666666 [44:53<1:45:29, 215.25it/s]

finished frames 1825800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304436/1666666 [44:54<1:45:51, 214.49it/s]

finished frames 1826400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304524/1666666 [44:54<1:45:52, 214.44it/s]

finished frames 1827000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304634/1666666 [44:54<1:45:53, 214.38it/s]

finished frames 1827600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304722/1666666 [44:55<1:46:19, 213.50it/s]

finished frames 1828200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304832/1666666 [44:55<1:46:09, 213.81it/s]

finished frames 1828800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 304942/1666666 [44:56<1:46:48, 212.47it/s]

finished frames 1829400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305030/1666666 [44:56<1:49:17, 207.66it/s]

finished frames 1830000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305139/1666666 [44:57<1:47:05, 211.90it/s]

finished frames 1830600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305227/1666666 [44:57<1:46:55, 212.22it/s]

finished frames 1831200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305337/1666666 [44:58<1:46:50, 212.36it/s]

finished frames 1831800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305425/1666666 [44:58<1:46:57, 212.13it/s]

finished frames 1832400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305535/1666666 [44:59<1:46:51, 212.29it/s]

finished frames 1833000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305623/1666666 [44:59<1:46:54, 212.19it/s]

finished frames 1833600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305733/1666666 [45:00<1:47:02, 211.89it/s]

finished frames 1834200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305843/1666666 [45:00<1:46:53, 212.18it/s]

finished frames 1834800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 305931/1666666 [45:01<1:46:58, 212.01it/s]

finished frames 1835400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306040/1666666 [45:01<1:49:56, 206.28it/s]

finished frames 1836000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306124/1666666 [45:01<1:49:13, 207.62it/s]

finished frames 1836600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306229/1666666 [45:02<1:50:48, 204.63it/s]

finished frames 1837200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306338/1666666 [45:03<1:46:47, 212.32it/s]

finished frames 1837800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306426/1666666 [45:03<1:46:44, 212.40it/s]

finished frames 1838400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306536/1666666 [45:03<1:45:59, 213.89it/s]

finished frames 1839000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306624/1666666 [45:04<1:46:10, 213.49it/s]

finished frames 1839600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306734/1666666 [45:04<1:46:09, 213.51it/s]

finished frames 1840200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306822/1666666 [45:05<1:46:07, 213.57it/s]

finished frames 1840800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 306933/1666666 [45:05<1:44:52, 216.09it/s]

finished frames 1841400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307021/1666666 [45:06<1:47:54, 210.00it/s]

finished frames 1842000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307131/1666666 [45:06<1:45:43, 214.30it/s]

finished frames 1842600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307241/1666666 [45:07<1:44:50, 216.11it/s]

finished frames 1843200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307329/1666666 [45:07<1:44:48, 216.15it/s]

finished frames 1843800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307439/1666666 [45:08<1:44:41, 216.37it/s]

finished frames 1844400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307527/1666666 [45:08<1:44:49, 216.11it/s]

finished frames 1845000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307637/1666666 [45:09<1:44:52, 215.97it/s]

finished frames 1845600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307725/1666666 [45:09<1:44:47, 216.13it/s]

finished frames 1846200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307835/1666666 [45:10<1:44:47, 216.13it/s]

finished frames 1846800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 307923/1666666 [45:10<1:44:35, 216.52it/s]

finished frames 1847400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 308033/1666666 [45:10<1:47:17, 211.05it/s]

finished frames 1848000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 308143/1666666 [45:11<1:45:11, 215.25it/s]

finished frames 1848600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 18%|█▊        | 308231/1666666 [45:11<1:45:19, 214.97it/s]

finished frames 1849200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 308341/1666666 [45:12<1:44:57, 215.69it/s]

finished frames 1849800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 308429/1666666 [45:12<1:44:39, 216.31it/s]

finished frames 1850400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 308540/1666666 [45:13<1:43:51, 217.96it/s]

finished frames 1851000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 308628/1666666 [45:13<1:44:09, 217.31it/s]

finished frames 1851600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 308738/1666666 [45:14<1:44:55, 215.71it/s]

finished frames 1852200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 308826/1666666 [45:14<1:44:34, 216.40it/s]

finished frames 1852800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 308936/1666666 [45:15<1:44:17, 216.99it/s]

finished frames 1853400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309024/1666666 [45:15<1:46:34, 212.31it/s]

finished frames 1854000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309134/1666666 [45:16<1:45:04, 215.32it/s]

finished frames 1854600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309244/1666666 [45:16<1:44:12, 217.10it/s]

finished frames 1855200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309332/1666666 [45:16<1:44:22, 216.75it/s]

finished frames 1855800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309442/1666666 [45:17<1:44:27, 216.53it/s]

finished frames 1856400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309530/1666666 [45:17<1:44:25, 216.60it/s]

finished frames 1857000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309640/1666666 [45:18<1:44:20, 216.78it/s]

finished frames 1857600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309728/1666666 [45:18<1:44:07, 217.19it/s]

finished frames 1858200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309838/1666666 [45:19<1:44:13, 216.98it/s]

finished frames 1858800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 309926/1666666 [45:19<1:44:12, 217.00it/s]

finished frames 1859400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310036/1666666 [45:20<1:46:43, 211.86it/s]

finished frames 1860000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310124/1666666 [45:20<1:45:02, 215.23it/s]

finished frames 1860600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310234/1666666 [45:21<1:44:20, 216.66it/s]

finished frames 1861200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310344/1666666 [45:21<1:44:11, 216.95it/s]

finished frames 1861800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310432/1666666 [45:22<1:44:20, 216.64it/s]

finished frames 1862400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310542/1666666 [45:22<1:44:13, 216.86it/s]

finished frames 1863000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310630/1666666 [45:22<1:44:11, 216.91it/s]

finished frames 1863600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310740/1666666 [45:23<1:44:24, 216.46it/s]

finished frames 1864200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310828/1666666 [45:23<1:44:07, 217.01it/s]

finished frames 1864800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 310938/1666666 [45:24<1:44:07, 217.00it/s]

finished frames 1865400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311026/1666666 [45:24<1:46:38, 211.86it/s]

finished frames 1866000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311136/1666666 [45:25<1:44:38, 215.91it/s]

finished frames 1866600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311224/1666666 [45:25<1:44:24, 216.37it/s]

finished frames 1867200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311334/1666666 [45:26<1:44:24, 216.34it/s]

finished frames 1867800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311444/1666666 [45:26<1:44:01, 217.13it/s]

finished frames 1868400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311532/1666666 [45:27<1:44:08, 216.86it/s]

finished frames 1869000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311642/1666666 [45:27<1:46:47, 211.48it/s]

finished frames 1869600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311730/1666666 [45:28<1:44:46, 215.52it/s]

finished frames 1870200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311840/1666666 [45:28<1:44:18, 216.47it/s]

finished frames 1870800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 311928/1666666 [45:29<1:44:15, 216.57it/s]

finished frames 1871400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 312038/1666666 [45:29<1:48:08, 208.76it/s]

finished frames 1872000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 312123/1666666 [45:29<1:48:02, 208.95it/s]

finished frames 1872600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 312230/1666666 [45:30<1:47:49, 209.36it/s]

finished frames 1873200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 312336/1666666 [45:30<1:47:31, 209.94it/s]

finished frames 1873800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▊        | 312422/1666666 [45:31<1:47:46, 209.44it/s]

finished frames 1874400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 312528/1666666 [45:31<1:48:02, 208.90it/s]

finished frames 1875000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 312635/1666666 [45:32<1:47:34, 209.79it/s]

finished frames 1875600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 312743/1666666 [45:32<1:47:24, 210.09it/s]

finished frames 1876200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 312830/1666666 [45:33<1:47:35, 209.72it/s]

finished frames 1876800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 312938/1666666 [45:33<1:47:28, 209.92it/s]

finished frames 1877400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313022/1666666 [45:34<1:50:13, 204.67it/s]

finished frames 1878000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313131/1666666 [45:34<1:47:07, 210.60it/s]

finished frames 1878600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313241/1666666 [45:35<1:46:55, 210.96it/s]

finished frames 1879200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313329/1666666 [45:35<1:46:36, 211.58it/s]

finished frames 1879800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313439/1666666 [45:36<1:46:29, 211.78it/s]

finished frames 1880400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313527/1666666 [45:36<1:46:44, 211.28it/s]

finished frames 1881000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313637/1666666 [45:37<1:46:51, 211.04it/s]

finished frames 1881600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313725/1666666 [45:37<1:46:50, 211.04it/s]

finished frames 1882200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313834/1666666 [45:38<1:47:38, 209.46it/s]

finished frames 1882800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 313922/1666666 [45:38<1:45:47, 213.12it/s]

finished frames 1883400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314031/1666666 [45:39<1:51:00, 203.08it/s]

finished frames 1884000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314141/1666666 [45:39<1:46:12, 212.24it/s]

finished frames 1884600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314229/1666666 [45:40<1:46:29, 211.67it/s]

finished frames 1885200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314339/1666666 [45:40<1:46:32, 211.55it/s]

finished frames 1885800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314427/1666666 [45:40<1:46:25, 211.78it/s]

finished frames 1886400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314537/1666666 [45:41<1:46:15, 212.07it/s]

finished frames 1887000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314625/1666666 [45:41<1:46:11, 212.20it/s]

finished frames 1887600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314735/1666666 [45:42<1:46:09, 212.24it/s]

finished frames 1888200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314823/1666666 [45:42<1:46:11, 212.18it/s]

finished frames 1888800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 314933/1666666 [45:43<1:47:09, 210.23it/s]

finished frames 1889400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315040/1666666 [45:43<1:50:10, 204.48it/s]

finished frames 1890000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315124/1666666 [45:44<1:48:51, 206.93it/s]

finished frames 1890600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315231/1666666 [45:44<1:47:55, 208.71it/s]

finished frames 1891200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315339/1666666 [45:45<1:46:43, 211.04it/s]

finished frames 1891800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315427/1666666 [45:45<1:46:14, 211.98it/s]

finished frames 1892400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315537/1666666 [45:46<1:46:00, 212.41it/s]

finished frames 1893000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315625/1666666 [45:46<1:46:06, 212.20it/s]

finished frames 1893600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315735/1666666 [45:47<1:46:38, 211.14it/s]

finished frames 1894200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315823/1666666 [45:47<1:46:12, 211.99it/s]

finished frames 1894800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 315933/1666666 [45:48<1:46:14, 211.89it/s]

finished frames 1895400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316021/1666666 [45:48<1:49:21, 205.86it/s]

finished frames 1896000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316130/1666666 [45:49<1:46:54, 210.53it/s]

finished frames 1896600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316238/1666666 [45:49<1:51:04, 202.64it/s]

finished frames 1897200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316324/1666666 [45:50<1:54:21, 196.80it/s]

finished frames 1897800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316433/1666666 [45:50<1:47:44, 208.86it/s]

finished frames 1898400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316542/1666666 [45:51<1:46:30, 211.27it/s]

finished frames 1899000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316630/1666666 [45:51<1:46:03, 212.15it/s]

finished frames 1899600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316740/1666666 [45:52<1:46:39, 210.96it/s]

finished frames 1900200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316827/1666666 [45:52<1:46:57, 210.34it/s]

finished frames 1900800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 316936/1666666 [45:52<1:46:49, 210.57it/s]

finished frames 1901400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317022/1666666 [45:53<1:49:44, 204.98it/s]

finished frames 1902000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317129/1666666 [45:53<1:46:19, 211.55it/s]

finished frames 1902600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317240/1666666 [45:54<1:45:23, 213.41it/s]

finished frames 1903200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317328/1666666 [45:54<1:44:32, 215.11it/s]

finished frames 1903800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317438/1666666 [45:55<1:47:15, 209.66it/s]

finished frames 1904400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317525/1666666 [45:55<1:46:32, 211.04it/s]

finished frames 1905000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317632/1666666 [45:56<1:48:45, 206.75it/s]

finished frames 1905600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317739/1666666 [45:56<1:46:43, 210.64it/s]

finished frames 1906200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317827/1666666 [45:57<1:45:08, 213.83it/s]

finished frames 1906800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 317937/1666666 [45:57<1:46:02, 211.98it/s]

finished frames 1907400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318025/1666666 [45:58<1:47:42, 208.69it/s]

finished frames 1908000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318135/1666666 [45:58<1:45:34, 212.90it/s]

finished frames 1908600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318223/1666666 [45:59<1:45:17, 213.44it/s]

finished frames 1909200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318332/1666666 [45:59<1:46:12, 211.58it/s]

finished frames 1909800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318442/1666666 [46:00<1:44:52, 214.27it/s]

finished frames 1910400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318530/1666666 [46:00<1:44:39, 214.68it/s]

finished frames 1911000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318640/1666666 [46:01<1:46:16, 211.40it/s]

finished frames 1911600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318728/1666666 [46:01<1:44:18, 215.37it/s]

finished frames 1912200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318838/1666666 [46:01<1:44:11, 215.61it/s]

finished frames 1912800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 318926/1666666 [46:02<1:43:53, 216.19it/s]

finished frames 1913400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319036/1666666 [46:02<1:45:56, 212.02it/s]

finished frames 1914000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319124/1666666 [46:03<1:44:15, 215.43it/s]

finished frames 1914600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319234/1666666 [46:03<1:43:43, 216.50it/s]

finished frames 1915200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319344/1666666 [46:04<1:43:42, 216.53it/s]

finished frames 1915800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319432/1666666 [46:04<1:43:50, 216.23it/s]

finished frames 1916400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319542/1666666 [46:05<1:43:34, 216.77it/s]

finished frames 1917000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319630/1666666 [46:05<1:43:40, 216.56it/s]

finished frames 1917600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319740/1666666 [46:06<1:43:36, 216.66it/s]

finished frames 1918200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319828/1666666 [46:06<1:44:05, 215.66it/s]

finished frames 1918800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 319938/1666666 [46:07<1:44:04, 215.68it/s]

finished frames 1919400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320026/1666666 [46:07<1:46:48, 210.13it/s]

finished frames 1920000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320136/1666666 [46:08<1:44:20, 215.08it/s]

finished frames 1920600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320224/1666666 [46:08<1:43:50, 216.12it/s]

finished frames 1921200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320334/1666666 [46:08<1:44:17, 215.14it/s]

finished frames 1921800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320444/1666666 [46:09<1:43:55, 215.88it/s]

finished frames 1922400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320532/1666666 [46:09<1:43:59, 215.73it/s]

finished frames 1923000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320642/1666666 [46:10<1:43:53, 215.93it/s]

finished frames 1923600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320730/1666666 [46:10<1:43:58, 215.74it/s]

finished frames 1924200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320840/1666666 [46:11<1:46:02, 211.53it/s]

finished frames 1924800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 320928/1666666 [46:11<1:45:57, 211.69it/s]

finished frames 1925400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321036/1666666 [46:12<1:51:37, 200.92it/s]

finished frames 1926000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321123/1666666 [46:12<1:47:07, 209.33it/s]

finished frames 1926600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321233/1666666 [46:13<1:45:54, 211.74it/s]

finished frames 1927200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321343/1666666 [46:13<1:45:29, 212.55it/s]

finished frames 1927800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321431/1666666 [46:14<1:45:32, 212.43it/s]

finished frames 1928400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321541/1666666 [46:14<1:45:30, 212.48it/s]

finished frames 1929000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321629/1666666 [46:15<1:45:47, 211.91it/s]

finished frames 1929600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321739/1666666 [46:15<1:45:38, 212.18it/s]

finished frames 1930200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321826/1666666 [46:16<1:46:45, 209.95it/s]

finished frames 1930800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 321933/1666666 [46:16<1:46:35, 210.26it/s]

finished frames 1931400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322040/1666666 [46:17<1:49:24, 204.85it/s]

finished frames 1932000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322124/1666666 [46:17<1:48:02, 207.40it/s]

finished frames 1932600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322230/1666666 [46:17<1:47:18, 208.83it/s]

finished frames 1933200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322335/1666666 [46:18<1:47:08, 209.12it/s]

finished frames 1933800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322442/1666666 [46:18<1:46:56, 209.51it/s]

finished frames 1934400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322527/1666666 [46:19<1:47:01, 209.31it/s]

finished frames 1935000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322633/1666666 [46:19<1:47:07, 209.12it/s]

finished frames 1935600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322739/1666666 [46:20<1:46:57, 209.42it/s]

finished frames 1936200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322824/1666666 [46:20<1:47:01, 209.26it/s]

finished frames 1936800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 322930/1666666 [46:21<1:46:55, 209.45it/s]

finished frames 1937400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323035/1666666 [46:21<1:49:16, 204.92it/s]

finished frames 1938000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323141/1666666 [46:22<1:47:37, 208.07it/s]

finished frames 1938600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323227/1666666 [46:22<1:46:50, 209.56it/s]

finished frames 1939200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323333/1666666 [46:23<1:52:28, 199.07it/s]

finished frames 1939800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323419/1666666 [46:23<1:48:13, 206.87it/s]

finished frames 1940400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323525/1666666 [46:24<1:49:40, 204.12it/s]

finished frames 1941000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323630/1666666 [46:24<1:47:44, 207.75it/s]

finished frames 1941600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323736/1666666 [46:25<1:47:10, 208.85it/s]

finished frames 1942200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323841/1666666 [46:25<1:47:22, 208.42it/s]

finished frames 1942800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 323925/1666666 [46:26<1:47:27, 208.26it/s]

finished frames 1943400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324030/1666666 [46:26<1:50:07, 203.19it/s]

finished frames 1944000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324135/1666666 [46:27<1:47:52, 207.43it/s]

finished frames 1944600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324242/1666666 [46:27<1:46:55, 209.24it/s]

finished frames 1945200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324328/1666666 [46:28<1:46:51, 209.36it/s]

finished frames 1945800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324433/1666666 [46:28<1:47:15, 208.56it/s]

finished frames 1946400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324540/1666666 [46:29<1:46:48, 209.44it/s]

finished frames 1947000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324626/1666666 [46:29<1:46:48, 209.41it/s]

finished frames 1947600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324734/1666666 [46:30<1:46:35, 209.84it/s]

finished frames 1948200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324841/1666666 [46:30<1:46:33, 209.87it/s]

finished frames 1948800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 19%|█▉        | 324925/1666666 [46:30<1:47:02, 208.93it/s]

finished frames 1949400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325030/1666666 [46:31<1:49:21, 204.48it/s]

finished frames 1950000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325136/1666666 [46:31<1:47:37, 207.76it/s]

finished frames 1950600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325242/1666666 [46:32<1:46:38, 209.63it/s]

finished frames 1951200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325328/1666666 [46:32<1:46:43, 209.47it/s]

finished frames 1951800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325435/1666666 [46:33<1:46:36, 209.67it/s]

finished frames 1952400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325542/1666666 [46:33<1:46:33, 209.76it/s]

finished frames 1953000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325629/1666666 [46:34<1:46:33, 209.76it/s]

finished frames 1953600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325735/1666666 [46:34<1:50:04, 203.02it/s]

finished frames 1954200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325841/1666666 [46:35<1:50:18, 202.58it/s]

finished frames 1954800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 325926/1666666 [46:35<1:47:28, 207.92it/s]

finished frames 1955400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326032/1666666 [46:36<1:49:23, 204.25it/s]

finished frames 1956000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326137/1666666 [46:36<1:47:46, 207.29it/s]

finished frames 1956600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326222/1666666 [46:37<1:47:03, 208.67it/s]

finished frames 1957200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326331/1666666 [46:37<1:46:27, 209.83it/s]

finished frames 1957800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326440/1666666 [46:38<1:46:17, 210.16it/s]

finished frames 1958400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326528/1666666 [46:38<1:46:00, 210.70it/s]

finished frames 1959000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326636/1666666 [46:39<1:46:36, 209.51it/s]

finished frames 1959600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326722/1666666 [46:39<1:46:19, 210.04it/s]

finished frames 1960200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326832/1666666 [46:40<1:46:09, 210.36it/s]

finished frames 1960800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 326941/1666666 [46:40<1:45:59, 210.66it/s]

finished frames 1961400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327029/1666666 [46:41<1:46:49, 208.99it/s]

finished frames 1962000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327139/1666666 [46:41<1:44:15, 214.15it/s]

finished frames 1962600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327227/1666666 [46:42<1:44:20, 213.95it/s]

finished frames 1963200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327337/1666666 [46:42<1:44:19, 213.95it/s]

finished frames 1963800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327425/1666666 [46:42<1:44:14, 214.12it/s]

finished frames 1964400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327535/1666666 [46:43<1:44:23, 213.81it/s]

finished frames 1965000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327623/1666666 [46:43<1:44:25, 213.73it/s]

finished frames 1965600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327733/1666666 [46:44<1:44:11, 214.16it/s]

finished frames 1966200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327843/1666666 [46:44<1:43:37, 215.33it/s]

finished frames 1966800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 327931/1666666 [46:45<1:44:05, 214.35it/s]

finished frames 1967400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328041/1666666 [46:45<1:46:03, 210.36it/s]

finished frames 1968000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328129/1666666 [46:46<1:44:46, 212.92it/s]

finished frames 1968600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328239/1666666 [46:46<1:47:48, 206.92it/s]

finished frames 1969200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328327/1666666 [46:47<1:45:13, 211.98it/s]

finished frames 1969800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328437/1666666 [46:47<1:44:12, 214.05it/s]

finished frames 1970400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328525/1666666 [46:48<1:44:07, 214.20it/s]

finished frames 1971000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328635/1666666 [46:48<1:44:01, 214.39it/s]

finished frames 1971600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328723/1666666 [46:49<1:44:05, 214.23it/s]

finished frames 1972200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328833/1666666 [46:49<1:43:48, 214.79it/s]

finished frames 1972800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 328943/1666666 [46:50<1:43:57, 214.45it/s]

finished frames 1973400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329031/1666666 [46:50<1:46:38, 209.06it/s]

finished frames 1974000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329140/1666666 [46:51<1:44:30, 213.29it/s]

finished frames 1974600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329228/1666666 [46:51<1:44:23, 213.53it/s]

finished frames 1975200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329338/1666666 [46:51<1:43:46, 214.80it/s]

finished frames 1975800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329426/1666666 [46:52<1:43:46, 214.78it/s]

finished frames 1976400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329536/1666666 [46:52<1:43:20, 215.65it/s]

finished frames 1977000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329624/1666666 [46:53<1:43:46, 214.73it/s]

finished frames 1977600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329734/1666666 [46:53<1:43:44, 214.78it/s]

finished frames 1978200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329844/1666666 [46:54<1:43:45, 214.74it/s]

finished frames 1978800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 329932/1666666 [46:54<1:43:48, 214.60it/s]

finished frames 1979400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330020/1666666 [46:55<1:46:49, 208.53it/s]

finished frames 1980000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330129/1666666 [46:55<1:44:41, 212.78it/s]

finished frames 1980600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330239/1666666 [46:56<1:44:08, 213.88it/s]

finished frames 1981200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330327/1666666 [46:56<1:43:56, 214.27it/s]

finished frames 1981800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330437/1666666 [46:57<1:43:53, 214.37it/s]

finished frames 1982400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330525/1666666 [46:57<1:43:56, 214.24it/s]

finished frames 1983000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330635/1666666 [46:58<1:47:49, 206.51it/s]

finished frames 1983600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330744/1666666 [46:58<1:44:16, 213.51it/s]

finished frames 1984200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330832/1666666 [46:59<1:43:43, 214.64it/s]

finished frames 1984800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 330942/1666666 [46:59<1:43:14, 215.64it/s]

finished frames 1985400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331030/1666666 [46:59<1:45:52, 210.27it/s]

finished frames 1986000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331140/1666666 [47:00<1:43:39, 214.74it/s]

finished frames 1986600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331228/1666666 [47:00<1:43:19, 215.41it/s]

finished frames 1987200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331338/1666666 [47:01<1:43:12, 215.64it/s]

finished frames 1987800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331426/1666666 [47:01<1:43:14, 215.56it/s]

finished frames 1988400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331536/1666666 [47:02<1:43:15, 215.49it/s]

finished frames 1989000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331624/1666666 [47:02<1:43:26, 215.09it/s]

finished frames 1989600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331734/1666666 [47:03<1:43:06, 215.79it/s]

finished frames 1990200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331844/1666666 [47:03<1:43:04, 215.82it/s]

finished frames 1990800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 331932/1666666 [47:04<1:43:03, 215.87it/s]

finished frames 1991400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332020/1666666 [47:04<1:46:19, 209.21it/s]

finished frames 1992000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332130/1666666 [47:05<1:43:45, 214.36it/s]

finished frames 1992600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332240/1666666 [47:05<1:43:08, 215.62it/s]

finished frames 1993200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332328/1666666 [47:06<1:43:32, 214.78it/s]

finished frames 1993800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332438/1666666 [47:06<1:45:16, 211.23it/s]

finished frames 1994400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332526/1666666 [47:06<1:44:39, 212.48it/s]

finished frames 1995000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332636/1666666 [47:07<1:43:36, 214.60it/s]

finished frames 1995600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332724/1666666 [47:07<1:43:26, 214.94it/s]

finished frames 1996200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332834/1666666 [47:08<1:52:05, 198.33it/s]

finished frames 1996800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 332944/1666666 [47:08<1:43:58, 213.80it/s]

finished frames 1997400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 333032/1666666 [47:09<1:45:33, 210.56it/s]

finished frames 1998000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 333142/1666666 [47:09<1:44:02, 213.61it/s]

finished frames 1998600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|█▉        | 333230/1666666 [47:10<1:43:53, 213.91it/s]

finished frames 1999200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 333340/1666666 [47:10<1:43:45, 214.15it/s]

finished frames 1999800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 333428/1666666 [47:11<1:43:50, 213.98it/s]

finished frames 2000400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 333538/1666666 [47:11<1:43:36, 214.45it/s]

finished frames 2001000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 333626/1666666 [47:12<1:43:44, 214.15it/s]

finished frames 2001600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 333736/1666666 [47:12<1:43:37, 214.39it/s]

finished frames 2002200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 333824/1666666 [47:13<1:43:30, 214.61it/s]

finished frames 2002800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 333934/1666666 [47:13<1:43:41, 214.21it/s]

finished frames 2003400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334022/1666666 [47:13<1:46:44, 208.07it/s]

finished frames 2004000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334132/1666666 [47:14<1:44:06, 213.34it/s]

finished frames 2004600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334242/1666666 [47:15<1:43:40, 214.21it/s]

finished frames 2005200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334330/1666666 [47:15<1:43:49, 213.88it/s]

finished frames 2005800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334440/1666666 [47:15<1:44:42, 212.06it/s]

finished frames 2006400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334528/1666666 [47:16<1:44:43, 212.01it/s]

finished frames 2007000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334638/1666666 [47:16<1:44:55, 211.58it/s]

finished frames 2007600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334726/1666666 [47:17<1:44:37, 212.19it/s]

finished frames 2008200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334836/1666666 [47:17<1:44:46, 211.85it/s]

finished frames 2008800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 334924/1666666 [47:18<1:44:36, 212.19it/s]

finished frames 2009400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335034/1666666 [47:18<1:47:02, 207.33it/s]

finished frames 2010000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335143/1666666 [47:19<1:45:17, 210.75it/s]

finished frames 2010600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335231/1666666 [47:19<1:44:49, 211.70it/s]

finished frames 2011200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335319/1666666 [47:20<1:45:01, 211.28it/s]

finished frames 2011800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335429/1666666 [47:20<1:48:36, 204.30it/s]

finished frames 2012400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335538/1666666 [47:21<1:45:25, 210.44it/s]

finished frames 2013000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335626/1666666 [47:21<1:44:44, 211.81it/s]

finished frames 2013600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335736/1666666 [47:22<1:44:42, 211.85it/s]

finished frames 2014200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335824/1666666 [47:22<1:44:41, 211.87it/s]

finished frames 2014800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 335934/1666666 [47:23<1:44:48, 211.61it/s]

finished frames 2015400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336022/1666666 [47:23<1:47:39, 206.00it/s]

finished frames 2016000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336131/1666666 [47:24<1:45:36, 209.97it/s]

finished frames 2016600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336241/1666666 [47:24<1:44:33, 212.06it/s]

finished frames 2017200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336329/1666666 [47:24<1:44:38, 211.89it/s]

finished frames 2017800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336439/1666666 [47:25<1:44:30, 212.15it/s]

finished frames 2018400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336527/1666666 [47:25<1:44:39, 211.82it/s]

finished frames 2019000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336637/1666666 [47:26<1:44:40, 211.77it/s]

finished frames 2019600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336725/1666666 [47:26<1:44:41, 211.73it/s]

finished frames 2020200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336835/1666666 [47:27<1:44:33, 211.96it/s]

finished frames 2020800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 336923/1666666 [47:27<1:44:34, 211.93it/s]

finished frames 2021400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337032/1666666 [47:28<1:47:14, 206.63it/s]

finished frames 2022000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337141/1666666 [47:28<1:44:41, 211.65it/s]

finished frames 2022600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337229/1666666 [47:29<1:45:17, 210.42it/s]

finished frames 2023200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337338/1666666 [47:29<1:45:14, 210.51it/s]

finished frames 2023800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337425/1666666 [47:30<1:45:32, 209.92it/s]

finished frames 2024400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337532/1666666 [47:30<1:45:21, 210.26it/s]

finished frames 2025000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337638/1666666 [47:31<1:52:14, 197.33it/s]

finished frames 2025600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337743/1666666 [47:31<1:50:09, 201.05it/s]

finished frames 2026200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337830/1666666 [47:32<1:46:34, 207.82it/s]

finished frames 2026800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 337938/1666666 [47:32<1:45:40, 209.56it/s]

finished frames 2027400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338023/1666666 [47:33<1:48:09, 204.73it/s]

finished frames 2028000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338130/1666666 [47:33<1:46:02, 208.81it/s]

finished frames 2028600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338236/1666666 [47:34<1:45:41, 209.48it/s]

finished frames 2029200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338342/1666666 [47:34<1:45:33, 209.75it/s]

finished frames 2029800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338426/1666666 [47:35<1:45:58, 208.89it/s]

finished frames 2030400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338534/1666666 [47:35<1:45:18, 210.21it/s]

finished frames 2031000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338643/1666666 [47:36<1:45:06, 210.57it/s]

finished frames 2031600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338729/1666666 [47:36<1:45:34, 209.63it/s]

finished frames 2032200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338836/1666666 [47:37<1:45:34, 209.61it/s]

finished frames 2032800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 338941/1666666 [47:37<1:45:36, 209.52it/s]

finished frames 2033400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339025/1666666 [47:37<1:48:13, 204.47it/s]

finished frames 2034000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339132/1666666 [47:38<1:45:52, 208.98it/s]

finished frames 2034600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339240/1666666 [47:38<1:45:35, 209.51it/s]

finished frames 2035200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339327/1666666 [47:39<1:45:21, 209.98it/s]

finished frames 2035800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339432/1666666 [47:39<1:45:46, 209.13it/s]

finished frames 2036400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339539/1666666 [47:40<1:45:42, 209.26it/s]

finished frames 2037000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339623/1666666 [47:40<1:46:16, 208.11it/s]

finished frames 2037600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339728/1666666 [47:41<1:46:07, 208.40it/s]

finished frames 2038200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339833/1666666 [47:41<1:46:11, 208.26it/s]

finished frames 2038800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 339917/1666666 [47:42<1:55:53, 190.81it/s]

finished frames 2039400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340022/1666666 [47:42<1:59:55, 184.36it/s]

finished frames 2040000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340131/1666666 [47:43<1:46:17, 207.99it/s]

finished frames 2040600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340240/1666666 [47:43<1:43:26, 213.73it/s]

finished frames 2041200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340328/1666666 [47:44<1:42:51, 214.91it/s]

finished frames 2041800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340438/1666666 [47:44<1:42:57, 214.68it/s]

finished frames 2042400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340526/1666666 [47:45<1:42:50, 214.91it/s]

finished frames 2043000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340636/1666666 [47:45<1:43:08, 214.28it/s]

finished frames 2043600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340724/1666666 [47:46<1:42:55, 214.72it/s]

finished frames 2044200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340834/1666666 [47:46<1:42:47, 214.96it/s]

finished frames 2044800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 340922/1666666 [47:47<1:42:58, 214.56it/s]

finished frames 2045400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 341032/1666666 [47:47<1:45:14, 209.93it/s]

finished frames 2046000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 341142/1666666 [47:48<1:43:00, 214.46it/s]

finished frames 2046600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 341230/1666666 [47:48<1:42:22, 215.78it/s]

finished frames 2047200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 341340/1666666 [47:48<1:42:36, 215.27it/s]

finished frames 2047800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 341428/1666666 [47:49<1:42:39, 215.17it/s]

finished frames 2048400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 341538/1666666 [47:49<1:42:49, 214.77it/s]

finished frames 2049000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 20%|██        | 341626/1666666 [47:50<1:43:06, 214.18it/s]

finished frames 2049600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 341736/1666666 [47:50<1:42:37, 215.19it/s]

finished frames 2050200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 341824/1666666 [47:51<1:42:52, 214.62it/s]

finished frames 2050800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 341934/1666666 [47:51<1:42:28, 215.46it/s]

finished frames 2051400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342022/1666666 [47:52<1:45:47, 208.69it/s]

finished frames 2052000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342132/1666666 [47:52<1:43:08, 214.03it/s]

finished frames 2052600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342242/1666666 [47:53<1:42:14, 215.88it/s]

finished frames 2053200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342329/1666666 [47:53<1:51:40, 197.65it/s]

finished frames 2053800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342439/1666666 [47:54<1:45:26, 209.31it/s]

finished frames 2054400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342526/1666666 [47:54<1:44:04, 212.04it/s]

finished frames 2055000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342634/1666666 [47:55<1:45:42, 208.77it/s]

finished frames 2055600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342739/1666666 [47:55<1:48:59, 202.46it/s]

finished frames 2056200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342825/1666666 [47:56<1:46:08, 207.88it/s]

finished frames 2056800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 342932/1666666 [47:56<1:45:39, 208.82it/s]

finished frames 2057400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343041/1666666 [47:57<1:44:54, 210.27it/s]

finished frames 2058000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343129/1666666 [47:57<1:44:26, 211.21it/s]

finished frames 2058600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343239/1666666 [47:57<1:42:31, 215.13it/s]

finished frames 2059200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343327/1666666 [47:58<1:41:46, 216.71it/s]

finished frames 2059800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343437/1666666 [47:58<1:41:54, 216.41it/s]

finished frames 2060400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343525/1666666 [47:59<1:41:41, 216.84it/s]

finished frames 2061000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343635/1666666 [47:59<1:41:46, 216.67it/s]

finished frames 2061600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343723/1666666 [48:00<1:41:34, 217.07it/s]

finished frames 2062200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343833/1666666 [48:00<1:41:28, 217.26it/s]

finished frames 2062800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 343943/1666666 [48:01<1:41:40, 216.83it/s]

finished frames 2063400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344031/1666666 [48:01<1:44:05, 211.78it/s]

finished frames 2064000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344141/1666666 [48:02<1:42:09, 215.78it/s]

finished frames 2064600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344229/1666666 [48:02<1:41:42, 216.70it/s]

finished frames 2065200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344339/1666666 [48:03<1:41:21, 217.43it/s]

finished frames 2065800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344427/1666666 [48:03<1:41:30, 217.11it/s]

finished frames 2066400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344537/1666666 [48:03<1:41:17, 217.53it/s]

finished frames 2067000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344625/1666666 [48:04<1:41:32, 217.01it/s]

finished frames 2067600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344735/1666666 [48:04<1:47:31, 204.90it/s]

finished frames 2068200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344823/1666666 [48:05<1:52:22, 196.04it/s]

finished frames 2068800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 344933/1666666 [48:05<1:43:40, 212.47it/s]

finished frames 2069400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345021/1666666 [48:06<1:45:32, 208.72it/s]

finished frames 2070000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345131/1666666 [48:06<1:42:52, 214.12it/s]

finished frames 2070600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345241/1666666 [48:07<1:42:23, 215.09it/s]

finished frames 2071200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345329/1666666 [48:07<1:42:18, 215.26it/s]

finished frames 2071800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345439/1666666 [48:08<1:42:16, 215.29it/s]

finished frames 2072400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345527/1666666 [48:08<1:42:19, 215.20it/s]

finished frames 2073000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345637/1666666 [48:09<1:42:06, 215.62it/s]

finished frames 2073600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345725/1666666 [48:09<1:42:18, 215.19it/s]

finished frames 2074200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345835/1666666 [48:10<1:41:58, 215.88it/s]

finished frames 2074800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 345923/1666666 [48:10<1:41:45, 216.31it/s]

finished frames 2075400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346033/1666666 [48:11<1:43:44, 212.15it/s]

finished frames 2076000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346143/1666666 [48:11<1:41:56, 215.90it/s]

finished frames 2076600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346231/1666666 [48:11<1:41:41, 216.39it/s]

finished frames 2077200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346342/1666666 [48:12<1:41:14, 217.36it/s]

finished frames 2077800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346430/1666666 [48:12<1:41:30, 216.77it/s]

finished frames 2078400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346540/1666666 [48:13<1:41:19, 217.13it/s]

finished frames 2079000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346628/1666666 [48:13<1:41:31, 216.71it/s]

finished frames 2079600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346738/1666666 [48:14<1:41:31, 216.68it/s]

finished frames 2080200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346826/1666666 [48:14<1:41:32, 216.62it/s]

finished frames 2080800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 346936/1666666 [48:15<1:41:28, 216.77it/s]

finished frames 2081400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347024/1666666 [48:15<1:43:44, 212.00it/s]

finished frames 2082000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347134/1666666 [48:16<1:50:45, 198.55it/s]

finished frames 2082600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347222/1666666 [48:16<1:43:37, 212.22it/s]

finished frames 2083200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347332/1666666 [48:17<1:43:54, 211.62it/s]

finished frames 2083800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347442/1666666 [48:17<1:41:48, 215.96it/s]

finished frames 2084400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347530/1666666 [48:18<1:41:35, 216.42it/s]

finished frames 2085000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347640/1666666 [48:18<1:41:40, 216.21it/s]

finished frames 2085600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347728/1666666 [48:18<1:41:43, 216.10it/s]

finished frames 2086200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347838/1666666 [48:19<1:41:15, 217.09it/s]

finished frames 2086800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 347926/1666666 [48:19<1:41:29, 216.55it/s]

finished frames 2087400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348036/1666666 [48:20<1:43:28, 212.40it/s]

finished frames 2088000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348124/1666666 [48:20<1:42:33, 214.29it/s]

finished frames 2088600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348234/1666666 [48:21<1:41:38, 216.18it/s]

finished frames 2089200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348344/1666666 [48:21<1:41:25, 216.63it/s]

finished frames 2089800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348432/1666666 [48:22<1:41:35, 216.26it/s]

finished frames 2090400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348542/1666666 [48:22<1:41:21, 216.75it/s]

finished frames 2091000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348630/1666666 [48:23<1:41:14, 216.98it/s]

finished frames 2091600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348740/1666666 [48:23<1:41:16, 216.90it/s]

finished frames 2092200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348828/1666666 [48:24<1:41:53, 215.55it/s]

finished frames 2092800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 348938/1666666 [48:24<1:41:54, 215.49it/s]

finished frames 2093400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349026/1666666 [48:24<1:44:02, 211.09it/s]

finished frames 2094000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349136/1666666 [48:25<1:42:15, 214.75it/s]

finished frames 2094600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349224/1666666 [48:25<1:41:59, 215.30it/s]

finished frames 2095200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349334/1666666 [48:26<1:41:39, 215.99it/s]

finished frames 2095800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349444/1666666 [48:26<1:41:21, 216.58it/s]

finished frames 2096400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349532/1666666 [48:27<1:41:19, 216.66it/s]

finished frames 2097000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349642/1666666 [48:27<1:41:21, 216.55it/s]

finished frames 2097600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349730/1666666 [48:28<1:45:50, 207.36it/s]

finished frames 2098200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349839/1666666 [48:28<1:43:29, 212.08it/s]

finished frames 2098800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 349927/1666666 [48:29<1:42:58, 213.13it/s]

finished frames 2099400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350037/1666666 [48:29<1:44:59, 209.02it/s]

finished frames 2100000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350125/1666666 [48:30<1:42:26, 214.21it/s]

finished frames 2100600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350235/1666666 [48:30<1:43:52, 211.22it/s]

finished frames 2101200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350343/1666666 [48:31<1:44:18, 210.32it/s]

finished frames 2101800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350431/1666666 [48:31<1:44:09, 210.62it/s]

finished frames 2102400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350540/1666666 [48:32<1:44:09, 210.60it/s]

finished frames 2103000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350627/1666666 [48:32<1:44:12, 210.48it/s]

finished frames 2103600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350736/1666666 [48:33<1:44:03, 210.75it/s]

finished frames 2104200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350823/1666666 [48:33<1:44:03, 210.75it/s]

finished frames 2104800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 350933/1666666 [48:33<1:44:10, 210.49it/s]

finished frames 2105400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351040/1666666 [48:34<1:47:06, 204.72it/s]

finished frames 2106000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351126/1666666 [48:34<1:45:07, 208.57it/s]

finished frames 2106600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351234/1666666 [48:35<1:44:05, 210.62it/s]

finished frames 2107200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351322/1666666 [48:35<1:43:59, 210.82it/s]

finished frames 2107800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351431/1666666 [48:36<1:44:24, 209.96it/s]

finished frames 2108400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351539/1666666 [48:36<1:44:08, 210.47it/s]

finished frames 2109000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351627/1666666 [48:37<1:43:53, 210.98it/s]

finished frames 2109600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351737/1666666 [48:37<1:43:52, 210.98it/s]

finished frames 2110200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351824/1666666 [48:38<1:44:17, 210.12it/s]

finished frames 2110800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 351931/1666666 [48:38<1:51:00, 197.38it/s]

finished frames 2111400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352038/1666666 [48:39<1:47:38, 203.54it/s]

finished frames 2112000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352143/1666666 [48:39<1:45:30, 207.64it/s]

finished frames 2112600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352229/1666666 [48:40<1:44:26, 209.75it/s]

finished frames 2113200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352338/1666666 [48:40<1:43:47, 211.05it/s]

finished frames 2113800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352423/1666666 [48:41<1:44:35, 209.41it/s]

finished frames 2114400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352528/1666666 [48:41<1:44:57, 208.67it/s]

finished frames 2115000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352636/1666666 [48:42<1:44:17, 209.98it/s]

finished frames 2115600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352723/1666666 [48:42<1:43:48, 210.95it/s]

finished frames 2116200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352831/1666666 [48:43<1:44:29, 209.58it/s]

finished frames 2116800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 352940/1666666 [48:43<1:43:36, 211.33it/s]

finished frames 2117400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353027/1666666 [48:44<1:46:35, 205.40it/s]

finished frames 2118000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353135/1666666 [48:44<1:44:25, 209.66it/s]

finished frames 2118600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353222/1666666 [48:44<1:44:18, 209.88it/s]

finished frames 2119200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353332/1666666 [48:45<1:43:49, 210.82it/s]

finished frames 2119800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353442/1666666 [48:46<1:43:22, 211.73it/s]

finished frames 2120400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353530/1666666 [48:46<1:43:26, 211.58it/s]

finished frames 2121000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353640/1666666 [48:46<1:43:39, 211.10it/s]

finished frames 2121600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353727/1666666 [48:47<1:44:17, 209.81it/s]

finished frames 2122200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353835/1666666 [48:47<1:44:10, 210.02it/s]

finished frames 2122800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 353923/1666666 [48:48<1:44:07, 210.12it/s]

finished frames 2123400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 354030/1666666 [48:48<1:46:56, 204.58it/s]

finished frames 2124000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██        | 354138/1666666 [48:49<1:44:28, 209.38it/s]

finished frames 2124600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 354224/1666666 [48:49<1:44:20, 209.64it/s]

finished frames 2125200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 354331/1666666 [48:50<1:47:23, 203.68it/s]

finished frames 2125800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 354438/1666666 [48:50<1:44:31, 209.25it/s]

finished frames 2126400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 354525/1666666 [48:51<1:44:09, 209.97it/s]

finished frames 2127000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 354635/1666666 [48:51<1:43:49, 210.63it/s]

finished frames 2127600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 354723/1666666 [48:52<1:43:55, 210.40it/s]

finished frames 2128200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 354833/1666666 [48:52<1:44:00, 210.23it/s]

finished frames 2128800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 354942/1666666 [48:53<1:43:58, 210.25it/s]

finished frames 2129400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355027/1666666 [48:53<1:46:45, 204.76it/s]

finished frames 2130000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355134/1666666 [48:54<1:44:21, 209.46it/s]

finished frames 2130600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355243/1666666 [48:54<1:43:51, 210.45it/s]

finished frames 2131200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355331/1666666 [48:55<1:43:56, 210.26it/s]

finished frames 2131800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355440/1666666 [48:55<1:43:47, 210.54it/s]

finished frames 2132400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355527/1666666 [48:55<1:43:42, 210.70it/s]

finished frames 2133000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355637/1666666 [48:56<1:43:39, 210.81it/s]

finished frames 2133600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355724/1666666 [48:56<1:43:47, 210.49it/s]

finished frames 2134200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355834/1666666 [48:57<1:43:39, 210.76it/s]

finished frames 2134800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 355941/1666666 [48:57<1:44:19, 209.39it/s]

finished frames 2135400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356025/1666666 [48:58<1:46:36, 204.90it/s]

finished frames 2136000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356131/1666666 [48:58<1:44:45, 208.51it/s]

finished frames 2136600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356239/1666666 [48:59<1:44:04, 209.85it/s]

finished frames 2137200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356326/1666666 [48:59<1:43:47, 210.41it/s]

finished frames 2137800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356435/1666666 [49:00<1:43:49, 210.33it/s]

finished frames 2138400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356543/1666666 [49:00<1:43:53, 210.17it/s]

finished frames 2139000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356629/1666666 [49:01<1:44:40, 208.59it/s]

finished frames 2139600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356737/1666666 [49:01<1:44:09, 209.60it/s]

finished frames 2140200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356823/1666666 [49:02<1:44:18, 209.30it/s]

finished frames 2140800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 356931/1666666 [49:02<1:43:46, 210.34it/s]

finished frames 2141400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357037/1666666 [49:03<1:46:11, 205.54it/s]

finished frames 2142000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357123/1666666 [49:03<1:44:38, 208.56it/s]

finished frames 2142600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357231/1666666 [49:04<1:43:42, 210.43it/s]

finished frames 2143200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357340/1666666 [49:04<1:43:37, 210.57it/s]

finished frames 2143800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357427/1666666 [49:05<1:43:23, 211.04it/s]

finished frames 2144400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357536/1666666 [49:05<1:43:38, 210.52it/s]

finished frames 2145000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357643/1666666 [49:06<1:44:06, 209.56it/s]

finished frames 2145600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357727/1666666 [49:06<1:44:41, 208.39it/s]

finished frames 2146200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357832/1666666 [49:07<1:45:03, 207.64it/s]

finished frames 2146800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 357937/1666666 [49:07<1:44:44, 208.26it/s]

finished frames 2147400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 358021/1666666 [49:07<1:48:11, 201.59it/s]

finished frames 2148000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 358126/1666666 [49:08<1:45:20, 207.04it/s]

finished frames 2148600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 21%|██▏       | 358232/1666666 [49:08<1:44:36, 208.46it/s]

finished frames 2149200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 358337/1666666 [49:09<1:44:27, 208.76it/s]

finished frames 2149800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 358442/1666666 [49:09<1:44:17, 209.05it/s]

finished frames 2150400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 358527/1666666 [49:10<1:44:17, 209.04it/s]

finished frames 2151000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 358633/1666666 [49:10<1:44:11, 209.25it/s]

finished frames 2151600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 358739/1666666 [49:11<1:44:09, 209.29it/s]

finished frames 2152200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 358823/1666666 [49:11<1:44:43, 208.14it/s]

finished frames 2152800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 358928/1666666 [49:12<1:44:36, 208.34it/s]

finished frames 2153400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359032/1666666 [49:12<1:50:02, 198.04it/s]

finished frames 2154000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359137/1666666 [49:13<1:45:40, 206.22it/s]

finished frames 2154600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359223/1666666 [49:13<1:44:21, 208.80it/s]

finished frames 2155200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359328/1666666 [49:14<1:44:37, 208.25it/s]

finished frames 2155800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359433/1666666 [49:14<1:44:35, 208.30it/s]

finished frames 2156400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359539/1666666 [49:15<1:44:15, 208.95it/s]

finished frames 2157000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359625/1666666 [49:15<1:43:55, 209.61it/s]

finished frames 2157600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359730/1666666 [49:16<1:44:23, 208.66it/s]

finished frames 2158200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359836/1666666 [49:16<1:44:06, 209.21it/s]

finished frames 2158800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 359941/1666666 [49:17<1:44:30, 208.38it/s]

finished frames 2159400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360025/1666666 [49:17<1:46:43, 204.05it/s]

finished frames 2160000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360131/1666666 [49:18<1:44:41, 208.01it/s]

finished frames 2160600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360237/1666666 [49:18<1:44:06, 209.14it/s]

finished frames 2161200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360322/1666666 [49:19<1:44:13, 208.90it/s]

finished frames 2161800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360430/1666666 [49:19<1:43:58, 209.37it/s]

finished frames 2162400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360537/1666666 [49:20<1:43:53, 209.54it/s]

finished frames 2163000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360642/1666666 [49:20<1:44:12, 208.88it/s]

finished frames 2163600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360727/1666666 [49:21<1:44:07, 209.02it/s]

finished frames 2164200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360834/1666666 [49:21<1:43:56, 209.39it/s]

finished frames 2164800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 360940/1666666 [49:22<1:44:16, 208.69it/s]

finished frames 2165400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361024/1666666 [49:22<1:46:36, 204.11it/s]

finished frames 2166000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361131/1666666 [49:22<1:44:29, 208.23it/s]

finished frames 2166600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361236/1666666 [49:23<1:44:21, 208.47it/s]

finished frames 2167200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361321/1666666 [49:23<1:44:33, 208.09it/s]

finished frames 2167800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361430/1666666 [49:24<1:52:18, 193.71it/s]

finished frames 2168400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361537/1666666 [49:24<1:45:12, 206.76it/s]

finished frames 2169000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361622/1666666 [49:25<1:44:31, 208.08it/s]

finished frames 2169600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361728/1666666 [49:25<1:44:03, 209.02it/s]

finished frames 2170200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361834/1666666 [49:26<1:43:59, 209.14it/s]

finished frames 2170800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 361941/1666666 [49:26<1:43:52, 209.35it/s]

finished frames 2171400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362025/1666666 [49:27<1:46:55, 203.35it/s]

finished frames 2172000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362133/1666666 [49:27<1:44:21, 208.35it/s]

finished frames 2172600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362239/1666666 [49:28<1:43:51, 209.32it/s]

finished frames 2173200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362323/1666666 [49:28<1:43:59, 209.04it/s]

finished frames 2173800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362430/1666666 [49:29<1:43:50, 209.33it/s]

finished frames 2174400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362535/1666666 [49:29<1:44:01, 208.94it/s]

finished frames 2175000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362642/1666666 [49:30<1:43:49, 209.32it/s]

finished frames 2175600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362726/1666666 [49:30<1:44:05, 208.78it/s]

finished frames 2176200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362831/1666666 [49:31<1:44:01, 208.89it/s]

finished frames 2176800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 362937/1666666 [49:31<1:43:51, 209.22it/s]

finished frames 2177400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363021/1666666 [49:32<1:47:29, 202.14it/s]

finished frames 2178000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363126/1666666 [49:32<1:44:58, 206.95it/s]

finished frames 2178600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363231/1666666 [49:33<1:44:10, 208.52it/s]

finished frames 2179200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363337/1666666 [49:33<1:44:02, 208.77it/s]

finished frames 2179800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363442/1666666 [49:34<1:44:11, 208.48it/s]

finished frames 2180400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363526/1666666 [49:34<1:44:28, 207.89it/s]

finished frames 2181000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363631/1666666 [49:35<1:44:08, 208.55it/s]

finished frames 2181600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363736/1666666 [49:35<1:46:17, 204.31it/s]

finished frames 2182200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363841/1666666 [49:36<1:44:32, 207.70it/s]

finished frames 2182800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 363926/1666666 [49:36<1:43:46, 209.23it/s]

finished frames 2183400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364032/1666666 [49:36<1:46:05, 204.65it/s]

finished frames 2184000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364140/1666666 [49:37<1:43:24, 209.95it/s]

finished frames 2184600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364228/1666666 [49:37<1:42:59, 210.76it/s]

finished frames 2185200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364337/1666666 [49:38<1:42:57, 210.81it/s]

finished frames 2185800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364424/1666666 [49:38<1:42:58, 210.79it/s]

finished frames 2186400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364534/1666666 [49:39<1:42:58, 210.74it/s]

finished frames 2187000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364622/1666666 [49:39<1:43:07, 210.44it/s]

finished frames 2187600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364731/1666666 [49:40<1:43:04, 210.50it/s]

finished frames 2188200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364841/1666666 [49:40<1:42:37, 211.41it/s]

finished frames 2188800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 364928/1666666 [49:41<1:42:57, 210.73it/s]

finished frames 2189400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365037/1666666 [49:41<1:45:41, 205.25it/s]

finished frames 2190000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365123/1666666 [49:42<1:43:49, 208.94it/s]

finished frames 2190600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365232/1666666 [49:42<1:43:01, 210.52it/s]

finished frames 2191200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365341/1666666 [49:43<1:42:38, 211.29it/s]

finished frames 2191800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365428/1666666 [49:43<1:42:49, 210.91it/s]

finished frames 2192400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365535/1666666 [49:44<1:43:36, 209.31it/s]

finished frames 2193000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365641/1666666 [49:44<1:43:42, 209.10it/s]

finished frames 2193600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365725/1666666 [49:45<1:44:01, 208.42it/s]

finished frames 2194200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365830/1666666 [49:45<1:44:02, 208.37it/s]

finished frames 2194800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 365936/1666666 [49:46<1:43:45, 208.93it/s]

finished frames 2195400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366040/1666666 [49:46<1:50:29, 196.19it/s]

finished frames 2196000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366125/1666666 [49:47<1:45:22, 205.70it/s]

finished frames 2196600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366231/1666666 [49:47<1:44:54, 206.58it/s]

finished frames 2197200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366337/1666666 [49:48<1:43:48, 208.77it/s]

finished frames 2197800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366424/1666666 [49:48<1:43:00, 210.39it/s]

finished frames 2198400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366534/1666666 [49:48<1:42:26, 211.52it/s]

finished frames 2199000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366644/1666666 [49:49<1:41:25, 213.61it/s]

finished frames 2199600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366732/1666666 [49:49<1:40:59, 214.54it/s]

finished frames 2200200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366842/1666666 [49:50<1:41:09, 214.16it/s]

finished frames 2200800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 366930/1666666 [49:50<1:41:01, 214.43it/s]

finished frames 2201400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367040/1666666 [49:51<1:43:31, 209.22it/s]

finished frames 2202000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367127/1666666 [49:51<1:41:41, 212.98it/s]

finished frames 2202600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367237/1666666 [49:52<1:41:09, 214.09it/s]

finished frames 2203200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367325/1666666 [49:52<1:41:21, 213.67it/s]

finished frames 2203800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367435/1666666 [49:53<1:41:07, 214.11it/s]

finished frames 2204400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367523/1666666 [49:53<1:42:11, 211.87it/s]

finished frames 2205000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367634/1666666 [49:54<1:39:22, 217.88it/s]

finished frames 2205600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367722/1666666 [49:54<1:40:26, 215.54it/s]

finished frames 2206200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367833/1666666 [49:55<1:42:06, 211.99it/s]

finished frames 2206800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 367921/1666666 [49:55<1:44:55, 206.30it/s]

finished frames 2207400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368028/1666666 [49:56<1:45:48, 204.56it/s]

finished frames 2208000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368133/1666666 [49:56<1:44:27, 207.17it/s]

finished frames 2208600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368241/1666666 [49:57<1:42:11, 211.76it/s]

finished frames 2209200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368329/1666666 [49:57<1:41:23, 213.40it/s]

finished frames 2209800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368439/1666666 [49:57<1:42:00, 212.13it/s]

finished frames 2210400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368527/1666666 [49:58<1:47:13, 201.78it/s]

finished frames 2211000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368636/1666666 [49:58<1:42:21, 211.35it/s]

finished frames 2211600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368724/1666666 [49:59<1:42:16, 211.51it/s]

finished frames 2212200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368834/1666666 [49:59<1:42:17, 211.45it/s]

finished frames 2212800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 368922/1666666 [50:00<1:42:05, 211.85it/s]

finished frames 2213400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369031/1666666 [50:00<1:44:42, 206.55it/s]

finished frames 2214000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369140/1666666 [50:01<1:42:36, 210.75it/s]

finished frames 2214600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369228/1666666 [50:01<1:42:03, 211.87it/s]

finished frames 2215200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369338/1666666 [50:02<1:42:29, 210.97it/s]

finished frames 2215800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369426/1666666 [50:02<1:42:03, 211.84it/s]

finished frames 2216400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369536/1666666 [50:03<1:41:47, 212.38it/s]

finished frames 2217000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369624/1666666 [50:03<1:41:45, 212.45it/s]

finished frames 2217600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369734/1666666 [50:04<1:41:52, 212.19it/s]

finished frames 2218200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369822/1666666 [50:04<1:42:06, 211.69it/s]

finished frames 2218800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 369932/1666666 [50:05<1:42:10, 211.51it/s]

finished frames 2219400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370020/1666666 [50:05<1:45:00, 205.79it/s]

finished frames 2220000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370128/1666666 [50:06<1:43:31, 208.72it/s]

finished frames 2220600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370235/1666666 [50:06<1:43:12, 209.36it/s]

finished frames 2221200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370343/1666666 [50:07<1:42:14, 211.32it/s]

finished frames 2221800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370431/1666666 [50:07<1:41:54, 212.00it/s]

finished frames 2222400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370541/1666666 [50:07<1:42:00, 211.76it/s]

finished frames 2223000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370628/1666666 [50:08<1:42:40, 210.38it/s]

finished frames 2223600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370714/1666666 [50:08<1:43:11, 209.32it/s]

finished frames 2224200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370840/1666666 [50:09<1:46:05, 203.58it/s]

finished frames 2224800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 370924/1666666 [50:09<1:47:33, 200.78it/s]

finished frames 2225400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371031/1666666 [50:10<1:44:56, 205.76it/s]

finished frames 2226000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371140/1666666 [50:10<1:42:02, 211.60it/s]

finished frames 2226600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371228/1666666 [50:11<1:40:42, 214.40it/s]

finished frames 2227200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371338/1666666 [50:11<1:40:11, 215.49it/s]

finished frames 2227800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371426/1666666 [50:12<1:40:07, 215.61it/s]

finished frames 2228400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371536/1666666 [50:12<1:39:56, 215.97it/s]

finished frames 2229000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371624/1666666 [50:13<1:40:15, 215.29it/s]

finished frames 2229600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371734/1666666 [50:13<1:40:13, 215.35it/s]

finished frames 2230200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371844/1666666 [50:14<1:40:01, 215.74it/s]

finished frames 2230800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 371932/1666666 [50:14<1:40:06, 215.57it/s]

finished frames 2231400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372042/1666666 [50:15<1:42:26, 210.62it/s]

finished frames 2232000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372130/1666666 [50:15<1:40:46, 214.11it/s]

finished frames 2232600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372240/1666666 [50:16<1:40:19, 215.05it/s]

finished frames 2233200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372328/1666666 [50:16<1:40:31, 214.59it/s]

finished frames 2233800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372438/1666666 [50:16<1:40:17, 215.06it/s]

finished frames 2234400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372526/1666666 [50:17<1:40:18, 215.03it/s]

finished frames 2235000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372636/1666666 [50:17<1:40:05, 215.47it/s]

finished frames 2235600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372724/1666666 [50:18<1:40:18, 215.00it/s]

finished frames 2236200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372834/1666666 [50:18<1:40:17, 215.01it/s]

finished frames 2236800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 372944/1666666 [50:19<1:39:53, 215.85it/s]

finished frames 2237400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373032/1666666 [50:19<1:42:08, 211.10it/s]

finished frames 2238000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373142/1666666 [50:20<1:40:27, 214.61it/s]

finished frames 2238600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373230/1666666 [50:20<1:40:38, 214.19it/s]

finished frames 2239200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373340/1666666 [50:21<1:40:40, 214.12it/s]

finished frames 2239800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373428/1666666 [50:21<1:40:16, 214.94it/s]

finished frames 2240400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373538/1666666 [50:22<1:40:44, 213.92it/s]

finished frames 2241000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373626/1666666 [50:22<1:40:41, 214.01it/s]

finished frames 2241600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373736/1666666 [50:23<1:40:41, 214.02it/s]

finished frames 2242200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373824/1666666 [50:23<1:40:35, 214.22it/s]

finished frames 2242800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 373934/1666666 [50:23<1:40:40, 214.02it/s]

finished frames 2243400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374022/1666666 [50:24<1:43:55, 207.30it/s]

finished frames 2244000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374132/1666666 [50:24<1:40:59, 213.32it/s]

finished frames 2244600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374242/1666666 [50:25<1:40:19, 214.71it/s]

finished frames 2245200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374330/1666666 [50:25<1:40:41, 213.92it/s]

finished frames 2245800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374440/1666666 [50:26<1:40:06, 215.13it/s]

finished frames 2246400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374528/1666666 [50:26<1:40:16, 214.76it/s]

finished frames 2247000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374638/1666666 [50:27<1:39:58, 215.40it/s]

finished frames 2247600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374726/1666666 [50:27<1:39:48, 215.74it/s]

finished frames 2248200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374836/1666666 [50:28<1:40:04, 215.16it/s]

finished frames 2248800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 22%|██▏       | 374924/1666666 [50:28<1:39:57, 215.38it/s]

finished frames 2249400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375034/1666666 [50:29<1:41:53, 211.27it/s]

finished frames 2250000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375144/1666666 [50:29<1:40:04, 215.08it/s]

finished frames 2250600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375232/1666666 [50:30<1:40:05, 215.04it/s]

finished frames 2251200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375342/1666666 [50:30<1:39:58, 215.27it/s]

finished frames 2251800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375430/1666666 [50:30<1:39:55, 215.36it/s]

finished frames 2252400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375518/1666666 [50:31<1:40:07, 214.93it/s]

finished frames 2253000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375628/1666666 [50:31<1:42:47, 209.34it/s]

finished frames 2253600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375738/1666666 [50:32<1:43:18, 208.28it/s]

finished frames 2254200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375825/1666666 [50:32<1:41:22, 212.22it/s]

finished frames 2254800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 375935/1666666 [50:33<1:40:40, 213.68it/s]

finished frames 2255400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376023/1666666 [50:33<1:42:19, 210.22it/s]

finished frames 2256000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376132/1666666 [50:34<1:42:56, 208.96it/s]

finished frames 2256600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376242/1666666 [50:34<1:40:37, 213.72it/s]

finished frames 2257200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376330/1666666 [50:35<1:40:27, 214.06it/s]

finished frames 2257800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376440/1666666 [50:35<1:40:32, 213.88it/s]

finished frames 2258400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376528/1666666 [50:36<1:40:01, 214.97it/s]

finished frames 2259000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376638/1666666 [50:36<1:40:00, 214.97it/s]

finished frames 2259600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376726/1666666 [50:37<1:40:06, 214.77it/s]

finished frames 2260200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376836/1666666 [50:37<1:40:14, 214.47it/s]

finished frames 2260800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 376924/1666666 [50:38<1:40:27, 213.99it/s]

finished frames 2261400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377034/1666666 [50:38<1:42:30, 209.67it/s]

finished frames 2262000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377143/1666666 [50:39<1:40:16, 214.34it/s]

finished frames 2262600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377231/1666666 [50:39<1:40:09, 214.55it/s]

finished frames 2263200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377341/1666666 [50:40<1:39:54, 215.10it/s]

finished frames 2263800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377429/1666666 [50:40<1:39:58, 214.93it/s]

finished frames 2264400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377539/1666666 [50:40<1:39:34, 215.76it/s]

finished frames 2265000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377627/1666666 [50:41<1:39:59, 214.84it/s]

finished frames 2265600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377737/1666666 [50:41<1:39:52, 215.09it/s]

finished frames 2266200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377825/1666666 [50:42<1:40:09, 214.45it/s]

finished frames 2266800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 377935/1666666 [50:42<1:48:33, 197.87it/s]

finished frames 2267400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378023/1666666 [50:43<1:44:39, 205.22it/s]

finished frames 2268000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378132/1666666 [50:43<1:42:54, 208.69it/s]

finished frames 2268600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378242/1666666 [50:44<1:40:10, 214.37it/s]

finished frames 2269200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378330/1666666 [50:44<1:40:00, 214.70it/s]

finished frames 2269800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378440/1666666 [50:45<1:39:53, 214.95it/s]

finished frames 2270400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378528/1666666 [50:45<1:39:51, 214.99it/s]

finished frames 2271000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378638/1666666 [50:46<1:39:30, 215.74it/s]

finished frames 2271600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378726/1666666 [50:46<1:39:28, 215.78it/s]

finished frames 2272200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378836/1666666 [50:47<1:39:54, 214.84it/s]

finished frames 2272800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 378924/1666666 [50:47<1:39:23, 215.94it/s]

finished frames 2273400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379034/1666666 [50:47<1:42:17, 209.81it/s]

finished frames 2274000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379144/1666666 [50:48<1:40:31, 213.46it/s]

finished frames 2274600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379232/1666666 [50:48<1:40:08, 214.26it/s]

finished frames 2275200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379342/1666666 [50:49<1:40:11, 214.16it/s]

finished frames 2275800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379430/1666666 [50:49<1:40:16, 213.96it/s]

finished frames 2276400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379540/1666666 [50:50<1:40:09, 214.18it/s]

finished frames 2277000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379628/1666666 [50:50<1:40:11, 214.08it/s]

finished frames 2277600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379738/1666666 [50:51<1:40:15, 213.94it/s]

finished frames 2278200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379826/1666666 [50:51<1:40:21, 213.72it/s]

finished frames 2278800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 379936/1666666 [50:52<1:40:12, 214.00it/s]

finished frames 2279400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380024/1666666 [50:52<1:42:12, 209.82it/s]

finished frames 2280000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380134/1666666 [50:53<1:40:02, 214.35it/s]

finished frames 2280600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380244/1666666 [50:53<1:39:24, 215.66it/s]

finished frames 2281200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380332/1666666 [50:54<1:48:17, 197.98it/s]

finished frames 2281800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380442/1666666 [50:54<1:41:15, 211.72it/s]

finished frames 2282400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380530/1666666 [50:55<1:40:23, 213.53it/s]

finished frames 2283000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380640/1666666 [50:55<1:40:05, 214.13it/s]

finished frames 2283600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380728/1666666 [50:55<1:40:02, 214.23it/s]

finished frames 2284200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380838/1666666 [50:56<1:39:56, 214.42it/s]

finished frames 2284800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 380926/1666666 [50:56<1:40:00, 214.27it/s]

finished frames 2285400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381036/1666666 [50:57<1:42:11, 209.66it/s]

finished frames 2286000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381123/1666666 [50:57<1:40:29, 213.21it/s]

finished frames 2286600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381233/1666666 [50:58<1:39:51, 214.53it/s]

finished frames 2287200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381343/1666666 [50:58<1:39:46, 214.71it/s]

finished frames 2287800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381431/1666666 [50:59<1:39:54, 214.42it/s]

finished frames 2288400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381541/1666666 [50:59<1:39:56, 214.32it/s]

finished frames 2289000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381629/1666666 [51:00<1:40:00, 214.15it/s]

finished frames 2289600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381739/1666666 [51:00<1:39:33, 215.11it/s]

finished frames 2290200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381827/1666666 [51:01<1:39:48, 214.54it/s]

finished frames 2290800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 381937/1666666 [51:01<1:39:58, 214.19it/s]

finished frames 2291400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382025/1666666 [51:02<1:42:20, 209.21it/s]

finished frames 2292000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382135/1666666 [51:02<1:40:27, 213.12it/s]

finished frames 2292600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382223/1666666 [51:02<1:40:08, 213.76it/s]

finished frames 2293200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382333/1666666 [51:03<1:39:24, 215.33it/s]

finished frames 2293800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382443/1666666 [51:03<1:39:20, 215.47it/s]

finished frames 2294400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382531/1666666 [51:04<1:39:38, 214.79it/s]

finished frames 2295000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382641/1666666 [51:04<1:38:43, 216.76it/s]

finished frames 2295600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382729/1666666 [51:05<1:56:26, 183.77it/s]

finished frames 2296200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382839/1666666 [51:05<1:41:28, 210.87it/s]

finished frames 2296800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 382927/1666666 [51:06<1:39:43, 214.56it/s]

finished frames 2297400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383037/1666666 [51:06<1:41:11, 211.43it/s]

finished frames 2298000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383125/1666666 [51:07<1:39:31, 214.95it/s]

finished frames 2298600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383235/1666666 [51:07<1:38:48, 216.50it/s]

finished frames 2299200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383323/1666666 [51:08<1:39:32, 214.87it/s]

finished frames 2299800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383433/1666666 [51:08<1:39:31, 214.88it/s]

finished frames 2300400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383543/1666666 [51:09<1:39:29, 214.95it/s]

finished frames 2301000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383631/1666666 [51:09<1:39:39, 214.56it/s]

finished frames 2301600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383741/1666666 [51:10<1:39:38, 214.58it/s]

finished frames 2302200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383829/1666666 [51:10<1:39:39, 214.54it/s]

finished frames 2302800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 383939/1666666 [51:11<1:41:39, 210.31it/s]

finished frames 2303400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384024/1666666 [51:11<1:44:31, 204.51it/s]

finished frames 2304000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384132/1666666 [51:11<1:42:22, 208.78it/s]

finished frames 2304600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384239/1666666 [51:12<1:41:56, 209.66it/s]

finished frames 2305200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384324/1666666 [51:12<1:42:10, 209.16it/s]

finished frames 2305800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384430/1666666 [51:13<1:42:19, 208.85it/s]

finished frames 2306400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384536/1666666 [51:13<1:42:11, 209.09it/s]

finished frames 2307000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384643/1666666 [51:14<1:42:12, 209.06it/s]

finished frames 2307600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384729/1666666 [51:14<1:41:57, 209.57it/s]

finished frames 2308200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384837/1666666 [51:15<1:41:49, 209.81it/s]

finished frames 2308800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 384922/1666666 [51:15<1:42:03, 209.31it/s]

finished frames 2309400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385028/1666666 [51:16<1:44:28, 204.47it/s]

finished frames 2310000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385135/1666666 [51:16<1:42:18, 208.76it/s]

finished frames 2310600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385221/1666666 [51:17<1:51:11, 192.07it/s]

finished frames 2311200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385326/1666666 [51:17<1:43:54, 205.51it/s]

finished frames 2311800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385433/1666666 [51:18<1:42:00, 209.33it/s]

finished frames 2312400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385539/1666666 [51:18<1:42:07, 209.06it/s]

finished frames 2313000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385624/1666666 [51:19<1:42:13, 208.85it/s]

finished frames 2313600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385730/1666666 [51:19<1:42:02, 209.22it/s]

finished frames 2314200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385836/1666666 [51:20<1:42:06, 209.05it/s]

finished frames 2314800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 385941/1666666 [51:20<1:42:19, 208.62it/s]

finished frames 2315400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386025/1666666 [51:21<1:44:56, 203.39it/s]

finished frames 2316000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386131/1666666 [51:21<1:42:21, 208.49it/s]

finished frames 2316600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386239/1666666 [51:22<1:40:47, 211.72it/s]

finished frames 2317200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386327/1666666 [51:22<1:40:56, 211.40it/s]

finished frames 2317800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386437/1666666 [51:23<1:39:51, 213.67it/s]

finished frames 2318400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386525/1666666 [51:23<1:38:57, 215.60it/s]

finished frames 2319000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386635/1666666 [51:23<1:39:11, 215.08it/s]

finished frames 2319600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386723/1666666 [51:24<1:38:31, 216.52it/s]

finished frames 2320200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386833/1666666 [51:24<1:38:34, 216.38it/s]

finished frames 2320800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 386943/1666666 [51:25<1:38:36, 216.29it/s]

finished frames 2321400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387031/1666666 [51:25<1:40:54, 211.34it/s]

finished frames 2322000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387141/1666666 [51:26<1:39:04, 215.23it/s]

finished frames 2322600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387229/1666666 [51:26<1:39:20, 214.65it/s]

finished frames 2323200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387339/1666666 [51:27<1:38:53, 215.60it/s]

finished frames 2323800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387427/1666666 [51:27<1:38:35, 216.24it/s]

finished frames 2324400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387537/1666666 [51:28<1:41:43, 209.58it/s]

finished frames 2325000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387625/1666666 [51:28<1:45:34, 201.93it/s]

finished frames 2325600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387735/1666666 [51:29<1:39:53, 213.37it/s]

finished frames 2326200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387823/1666666 [51:29<1:39:20, 214.54it/s]

finished frames 2326800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 387933/1666666 [51:30<1:39:14, 214.74it/s]

finished frames 2327400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388021/1666666 [51:30<1:42:10, 208.58it/s]

finished frames 2328000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388131/1666666 [51:31<1:39:41, 213.73it/s]

finished frames 2328600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388241/1666666 [51:31<1:39:07, 214.94it/s]

finished frames 2329200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388329/1666666 [51:31<1:39:18, 214.52it/s]

finished frames 2329800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388439/1666666 [51:32<1:39:08, 214.87it/s]

finished frames 2330400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388527/1666666 [51:32<1:38:42, 215.80it/s]

finished frames 2331000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388637/1666666 [51:33<1:38:25, 216.41it/s]

finished frames 2331600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388725/1666666 [51:33<1:38:15, 216.78it/s]

finished frames 2332200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388835/1666666 [51:34<1:38:07, 217.05it/s]

finished frames 2332800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 388923/1666666 [51:34<1:38:07, 217.04it/s]

finished frames 2333400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389033/1666666 [51:35<1:40:00, 212.93it/s]

finished frames 2334000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389143/1666666 [51:35<1:38:12, 216.80it/s]

finished frames 2334600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389231/1666666 [51:36<1:38:14, 216.71it/s]

finished frames 2335200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389341/1666666 [51:36<1:37:47, 217.70it/s]

finished frames 2335800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389429/1666666 [51:37<1:38:02, 217.13it/s]

finished frames 2336400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389539/1666666 [51:37<1:37:54, 217.39it/s]

finished frames 2337000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389627/1666666 [51:37<1:38:24, 216.29it/s]

finished frames 2337600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389737/1666666 [51:38<1:37:42, 217.81it/s]

finished frames 2338200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389825/1666666 [51:38<1:38:25, 216.22it/s]

finished frames 2338800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 389935/1666666 [51:39<1:42:49, 206.93it/s]

finished frames 2339400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390023/1666666 [51:39<1:41:28, 209.68it/s]

finished frames 2340000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390133/1666666 [51:40<1:38:45, 215.42it/s]

finished frames 2340600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390243/1666666 [51:40<1:38:17, 216.45it/s]

finished frames 2341200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390331/1666666 [51:41<1:38:49, 215.26it/s]

finished frames 2341800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390441/1666666 [51:41<1:38:10, 216.67it/s]

finished frames 2342400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390529/1666666 [51:42<1:38:21, 216.24it/s]

finished frames 2343000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390639/1666666 [51:42<1:38:09, 216.65it/s]

finished frames 2343600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390727/1666666 [51:43<1:38:15, 216.43it/s]

finished frames 2344200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390837/1666666 [51:43<1:38:00, 216.95it/s]

finished frames 2344800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 390925/1666666 [51:44<1:38:10, 216.57it/s]

finished frames 2345400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 391035/1666666 [51:44<1:40:15, 212.07it/s]

finished frames 2346000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 391123/1666666 [51:44<1:38:39, 215.46it/s]

finished frames 2346600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 391233/1666666 [51:45<1:38:17, 216.25it/s]

finished frames 2347200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 391343/1666666 [51:45<1:38:21, 216.09it/s]

finished frames 2347800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 391431/1666666 [51:46<1:38:26, 215.90it/s]

finished frames 2348400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 391541/1666666 [51:46<1:37:56, 216.99it/s]

finished frames 2349000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 23%|██▎       | 391629/1666666 [51:47<1:38:20, 216.09it/s]

finished frames 2349600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 391739/1666666 [51:47<1:39:11, 214.23it/s]

finished frames 2350200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 391827/1666666 [51:48<1:39:16, 214.02it/s]

finished frames 2350800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 391937/1666666 [51:48<1:38:54, 214.79it/s]

finished frames 2351400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392025/1666666 [51:49<1:41:28, 209.36it/s]

finished frames 2352000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392133/1666666 [51:49<1:40:04, 212.27it/s]

finished frames 2352600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392243/1666666 [51:50<1:39:59, 212.43it/s]

finished frames 2353200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392331/1666666 [51:50<1:46:45, 198.94it/s]

finished frames 2353800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392418/1666666 [51:51<2:00:13, 176.65it/s]

finished frames 2354400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392527/1666666 [51:51<1:43:22, 205.42it/s]

finished frames 2355000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392636/1666666 [51:52<1:40:37, 211.03it/s]

finished frames 2355600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392724/1666666 [51:52<1:39:52, 212.59it/s]

finished frames 2356200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392834/1666666 [51:53<1:39:51, 212.60it/s]

finished frames 2356800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 392922/1666666 [51:53<1:39:44, 212.85it/s]

finished frames 2357400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393032/1666666 [51:54<1:42:49, 206.44it/s]

finished frames 2358000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393141/1666666 [51:54<1:41:44, 208.61it/s]

finished frames 2358600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393227/1666666 [51:54<1:42:35, 206.89it/s]

finished frames 2359200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393336/1666666 [51:55<1:41:44, 208.60it/s]

finished frames 2359800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393443/1666666 [51:55<1:41:36, 208.84it/s]

finished frames 2360400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393527/1666666 [51:56<1:42:06, 207.79it/s]

finished frames 2361000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393633/1666666 [51:56<1:41:41, 208.63it/s]

finished frames 2361600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393743/1666666 [51:57<1:39:31, 213.17it/s]

finished frames 2362200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393830/1666666 [51:57<1:43:24, 205.15it/s]

finished frames 2362800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 393939/1666666 [51:58<1:40:47, 210.44it/s]

finished frames 2363400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394027/1666666 [51:58<1:42:17, 207.36it/s]

finished frames 2364000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394136/1666666 [51:59<1:40:25, 211.19it/s]

finished frames 2364600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394224/1666666 [51:59<1:40:00, 212.05it/s]

finished frames 2365200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394334/1666666 [52:00<1:39:57, 212.13it/s]

finished frames 2365800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394422/1666666 [52:00<1:39:44, 212.58it/s]

finished frames 2366400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394532/1666666 [52:01<1:39:53, 212.27it/s]

finished frames 2367000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394642/1666666 [52:01<1:39:40, 212.71it/s]

finished frames 2367600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394730/1666666 [52:02<1:42:45, 206.30it/s]

finished frames 2368200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394838/1666666 [52:02<1:46:13, 199.55it/s]

finished frames 2368800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 394925/1666666 [52:03<1:41:22, 209.07it/s]

finished frames 2369400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 395035/1666666 [52:03<1:42:37, 206.53it/s]

finished frames 2370000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 395144/1666666 [52:04<1:40:07, 211.67it/s]

finished frames 2370600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 395232/1666666 [52:04<1:40:00, 211.90it/s]

finished frames 2371200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 395342/1666666 [52:05<1:39:36, 212.73it/s]

finished frames 2371800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 395430/1666666 [52:05<1:39:59, 211.90it/s]

finished frames 2372400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 395540/1666666 [52:06<1:39:37, 212.67it/s]

finished frames 2373000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 395628/1666666 [52:06<1:40:00, 211.83it/s]

finished frames 2373600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 395738/1666666 [52:06<1:39:54, 212.02it/s]

finished frames 2374200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▎       | 395826/1666666 [52:07<1:40:16, 211.23it/s]

finished frames 2374800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 395936/1666666 [52:07<1:40:19, 211.10it/s]

finished frames 2375400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396024/1666666 [52:08<1:41:49, 207.99it/s]

finished frames 2376000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396133/1666666 [52:08<1:40:08, 211.44it/s]

finished frames 2376600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396243/1666666 [52:09<1:39:37, 212.55it/s]

finished frames 2377200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396331/1666666 [52:09<1:39:51, 212.03it/s]

finished frames 2377800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396441/1666666 [52:10<1:39:41, 212.35it/s]

finished frames 2378400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396529/1666666 [52:10<1:39:18, 213.17it/s]

finished frames 2379000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396639/1666666 [52:11<1:39:34, 212.57it/s]

finished frames 2379600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396727/1666666 [52:11<1:39:54, 211.84it/s]

finished frames 2380200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396837/1666666 [52:12<1:39:41, 212.31it/s]

finished frames 2380800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 396925/1666666 [52:12<1:39:18, 213.11it/s]

finished frames 2381400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397034/1666666 [52:13<1:41:46, 207.91it/s]

finished frames 2382000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397121/1666666 [52:13<1:40:32, 210.45it/s]

finished frames 2382600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397231/1666666 [52:14<1:44:16, 202.89it/s]

finished frames 2383200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397340/1666666 [52:14<1:40:45, 209.95it/s]

finished frames 2383800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397428/1666666 [52:15<1:40:15, 210.99it/s]

finished frames 2384400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397538/1666666 [52:15<1:39:53, 211.76it/s]

finished frames 2385000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397626/1666666 [52:15<1:39:49, 211.88it/s]

finished frames 2385600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397736/1666666 [52:16<1:39:41, 212.13it/s]

finished frames 2386200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397824/1666666 [52:16<1:40:01, 211.42it/s]

finished frames 2386800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 397934/1666666 [52:17<1:39:59, 211.47it/s]

finished frames 2387400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398022/1666666 [52:17<1:42:55, 205.42it/s]

finished frames 2388000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398131/1666666 [52:18<1:40:27, 210.46it/s]

finished frames 2388600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398241/1666666 [52:18<1:39:52, 211.66it/s]

finished frames 2389200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398329/1666666 [52:19<1:39:54, 211.57it/s]

finished frames 2389800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398439/1666666 [52:19<1:39:48, 211.77it/s]

finished frames 2390400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398527/1666666 [52:20<1:39:53, 211.59it/s]

finished frames 2391000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398637/1666666 [52:20<1:39:50, 211.68it/s]

finished frames 2391600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398725/1666666 [52:21<1:39:31, 212.35it/s]

finished frames 2392200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398835/1666666 [52:21<1:39:38, 212.07it/s]

finished frames 2392800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 398923/1666666 [52:22<1:39:58, 211.33it/s]

finished frames 2393400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399033/1666666 [52:22<1:41:44, 207.65it/s]

finished frames 2394000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399142/1666666 [52:23<1:40:06, 211.02it/s]

finished frames 2394600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399230/1666666 [52:23<1:39:47, 211.66it/s]

finished frames 2395200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399340/1666666 [52:24<1:40:13, 210.74it/s]

finished frames 2395800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399427/1666666 [52:24<1:44:22, 202.37it/s]

finished frames 2396400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399534/1666666 [52:25<1:46:56, 197.49it/s]

finished frames 2397000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399643/1666666 [52:25<1:40:57, 209.16it/s]

finished frames 2397600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399730/1666666 [52:26<1:40:04, 211.00it/s]

finished frames 2398200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399840/1666666 [52:26<1:39:46, 211.61it/s]

finished frames 2398800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 399928/1666666 [52:26<1:39:23, 212.42it/s]

finished frames 2399400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400038/1666666 [52:27<1:41:38, 207.70it/s]

finished frames 2400000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400122/1666666 [52:27<1:41:44, 207.47it/s]

finished frames 2400600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400228/1666666 [52:28<1:41:05, 208.80it/s]

finished frames 2401200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400335/1666666 [52:28<1:40:37, 209.73it/s]

finished frames 2401800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400422/1666666 [52:29<1:40:27, 210.06it/s]

finished frames 2402400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400532/1666666 [52:29<1:40:22, 210.24it/s]

finished frames 2403000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400640/1666666 [52:30<1:40:30, 209.95it/s]

finished frames 2403600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400726/1666666 [52:30<1:40:33, 209.81it/s]

finished frames 2404200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400832/1666666 [52:31<1:40:37, 209.67it/s]

finished frames 2404800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 400941/1666666 [52:31<1:40:23, 210.13it/s]

finished frames 2405400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401026/1666666 [52:32<1:42:46, 205.24it/s]

finished frames 2406000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401132/1666666 [52:32<1:40:55, 209.01it/s]

finished frames 2406600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401240/1666666 [52:33<1:40:24, 210.05it/s]

finished frames 2407200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401327/1666666 [52:33<1:40:19, 210.22it/s]

finished frames 2407800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401433/1666666 [52:34<1:40:51, 209.07it/s]

finished frames 2408400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401539/1666666 [52:34<1:40:48, 209.16it/s]

finished frames 2409000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401624/1666666 [52:35<1:40:50, 209.09it/s]

finished frames 2409600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401730/1666666 [52:35<1:58:02, 178.59it/s]

finished frames 2410200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401836/1666666 [52:36<1:43:44, 203.21it/s]

finished frames 2410800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 401941/1666666 [52:36<1:41:28, 207.72it/s]

finished frames 2411400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402025/1666666 [52:37<1:43:26, 203.75it/s]

finished frames 2412000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402130/1666666 [52:37<1:41:41, 207.25it/s]

finished frames 2412600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402236/1666666 [52:38<1:41:00, 208.62it/s]

finished frames 2413200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402341/1666666 [52:38<1:40:52, 208.91it/s]

finished frames 2413800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402426/1666666 [52:39<1:40:55, 208.77it/s]

finished frames 2414400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402532/1666666 [52:39<1:40:47, 209.04it/s]

finished frames 2415000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402638/1666666 [52:40<1:40:47, 209.03it/s]

finished frames 2415600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402722/1666666 [52:40<1:40:51, 208.88it/s]

finished frames 2416200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402827/1666666 [52:40<1:40:57, 208.65it/s]

finished frames 2416800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 402932/1666666 [52:41<1:41:13, 208.09it/s]

finished frames 2417400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403038/1666666 [52:41<1:42:56, 204.59it/s]

finished frames 2418000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403125/1666666 [52:42<1:39:04, 212.54it/s]

finished frames 2418600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403235/1666666 [52:42<1:37:51, 215.18it/s]

finished frames 2419200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403323/1666666 [52:43<1:39:24, 211.79it/s]

finished frames 2419800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403433/1666666 [52:43<1:39:14, 212.15it/s]

finished frames 2420400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403543/1666666 [52:44<1:38:13, 214.33it/s]

finished frames 2421000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403631/1666666 [52:44<1:38:08, 214.50it/s]

finished frames 2421600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403741/1666666 [52:45<1:38:41, 213.28it/s]

finished frames 2422200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403829/1666666 [52:45<1:39:08, 212.31it/s]

finished frames 2422800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 403939/1666666 [52:46<1:38:27, 213.76it/s]

finished frames 2423400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404027/1666666 [52:46<1:40:28, 209.45it/s]

finished frames 2424000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404135/1666666 [52:47<1:45:46, 198.93it/s]

finished frames 2424600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404222/1666666 [52:47<1:46:14, 198.05it/s]

finished frames 2425200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404331/1666666 [52:48<1:39:15, 211.96it/s]

finished frames 2425800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404441/1666666 [52:48<1:37:42, 215.31it/s]

finished frames 2426400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404529/1666666 [52:49<1:37:33, 215.63it/s]

finished frames 2427000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404639/1666666 [52:49<1:37:09, 216.50it/s]

finished frames 2427600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404727/1666666 [52:49<1:37:21, 216.01it/s]

finished frames 2428200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404837/1666666 [52:50<1:37:20, 216.04it/s]

finished frames 2428800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 404925/1666666 [52:50<1:37:07, 216.50it/s]

finished frames 2429400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405035/1666666 [52:51<1:39:24, 211.53it/s]

finished frames 2430000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405123/1666666 [52:51<1:37:38, 215.32it/s]

finished frames 2430600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405233/1666666 [52:52<1:36:50, 217.10it/s]

finished frames 2431200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405343/1666666 [52:52<1:36:56, 216.84it/s]

finished frames 2431800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405431/1666666 [52:53<1:37:05, 216.50it/s]

finished frames 2432400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405541/1666666 [52:53<1:37:17, 216.02it/s]

finished frames 2433000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405629/1666666 [52:54<1:37:32, 215.48it/s]

finished frames 2433600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405739/1666666 [52:54<1:37:16, 216.04it/s]

finished frames 2434200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405827/1666666 [52:55<1:37:17, 215.97it/s]

finished frames 2434800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 405937/1666666 [52:55<1:39:35, 210.99it/s]

finished frames 2435400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406022/1666666 [52:55<1:42:21, 205.25it/s]

finished frames 2436000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406129/1666666 [52:56<1:40:31, 208.99it/s]

finished frames 2436600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406236/1666666 [52:57<1:40:13, 209.60it/s]

finished frames 2437200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406342/1666666 [52:57<1:40:14, 209.54it/s]

finished frames 2437800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406428/1666666 [52:57<1:40:04, 209.88it/s]

finished frames 2438400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406534/1666666 [52:58<1:46:30, 197.18it/s]

finished frames 2439000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406639/1666666 [52:59<1:41:42, 206.48it/s]

finished frames 2439600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406723/1666666 [52:59<1:41:25, 207.03it/s]

finished frames 2440200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406829/1666666 [52:59<1:40:45, 208.39it/s]

finished frames 2440800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 406935/1666666 [53:00<1:40:21, 209.22it/s]

finished frames 2441400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407040/1666666 [53:00<1:42:26, 204.95it/s]

finished frames 2442000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407127/1666666 [53:01<1:40:24, 209.08it/s]

finished frames 2442600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407235/1666666 [53:01<1:39:49, 210.29it/s]

finished frames 2443200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407343/1666666 [53:02<1:40:00, 209.87it/s]

finished frames 2443800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407430/1666666 [53:02<1:39:34, 210.77it/s]

finished frames 2444400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407538/1666666 [53:03<1:39:52, 210.12it/s]

finished frames 2445000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407626/1666666 [53:03<1:39:48, 210.24it/s]

finished frames 2445600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407735/1666666 [53:04<1:39:57, 209.92it/s]

finished frames 2446200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407822/1666666 [53:04<1:39:49, 210.16it/s]

finished frames 2446800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 407932/1666666 [53:05<1:39:46, 210.26it/s]

finished frames 2447400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 408020/1666666 [53:05<1:42:24, 204.83it/s]

finished frames 2448000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 408127/1666666 [53:06<1:40:26, 208.82it/s]

finished frames 2448600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 24%|██▍       | 408234/1666666 [53:06<1:39:49, 210.09it/s]

finished frames 2449200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 408342/1666666 [53:07<1:39:48, 210.12it/s]

finished frames 2449800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 408429/1666666 [53:07<1:39:45, 210.22it/s]

finished frames 2450400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 408538/1666666 [53:08<1:39:30, 210.71it/s]

finished frames 2451000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 408624/1666666 [53:08<1:39:48, 210.08it/s]

finished frames 2451600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 408731/1666666 [53:09<1:40:08, 209.34it/s]

finished frames 2452200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 408816/1666666 [53:09<1:49:08, 192.09it/s]

finished frames 2452800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 408923/1666666 [53:09<1:41:29, 206.55it/s]

finished frames 2453400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409029/1666666 [53:10<1:44:42, 200.19it/s]

finished frames 2454000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409134/1666666 [53:11<1:41:10, 207.15it/s]

finished frames 2454600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409239/1666666 [53:11<1:40:18, 208.91it/s]

finished frames 2455200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409324/1666666 [53:11<1:40:09, 209.22it/s]

finished frames 2455800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409430/1666666 [53:12<1:40:16, 208.97it/s]

finished frames 2456400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409536/1666666 [53:12<1:40:12, 209.07it/s]

finished frames 2457000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409642/1666666 [53:13<1:40:14, 209.01it/s]

finished frames 2457600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409727/1666666 [53:13<1:40:24, 208.64it/s]

finished frames 2458200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409835/1666666 [53:14<1:39:44, 210.03it/s]

finished frames 2458800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 409943/1666666 [53:14<1:39:42, 210.05it/s]

finished frames 2459400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410028/1666666 [53:15<1:42:54, 203.52it/s]

finished frames 2460000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410136/1666666 [53:15<1:40:24, 208.56it/s]

finished frames 2460600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410243/1666666 [53:16<1:40:03, 209.27it/s]

finished frames 2461200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410327/1666666 [53:16<1:40:18, 208.74it/s]

finished frames 2461800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410432/1666666 [53:17<1:40:19, 208.69it/s]

finished frames 2462400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410538/1666666 [53:17<1:40:00, 209.32it/s]

finished frames 2463000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410643/1666666 [53:18<1:40:13, 208.88it/s]

finished frames 2463600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410727/1666666 [53:18<1:40:10, 208.94it/s]

finished frames 2464200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410832/1666666 [53:19<1:40:11, 208.91it/s]

finished frames 2464800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 410938/1666666 [53:19<1:40:13, 208.80it/s]

finished frames 2465400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411022/1666666 [53:20<1:42:27, 204.25it/s]

finished frames 2466000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411129/1666666 [53:20<1:40:28, 208.26it/s]

finished frames 2466600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411237/1666666 [53:21<1:41:52, 205.38it/s]

finished frames 2467200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411343/1666666 [53:21<1:42:30, 204.11it/s]

finished frames 2467800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411429/1666666 [53:22<1:40:05, 209.01it/s]

finished frames 2468400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411537/1666666 [53:22<1:39:28, 210.28it/s]

finished frames 2469000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411622/1666666 [53:23<1:39:49, 209.55it/s]

finished frames 2469600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411729/1666666 [53:23<1:39:50, 209.48it/s]

finished frames 2470200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411839/1666666 [53:24<1:38:56, 211.38it/s]

finished frames 2470800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 411927/1666666 [53:24<1:38:57, 211.32it/s]

finished frames 2471400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412036/1666666 [53:25<1:41:42, 205.60it/s]

finished frames 2472000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412123/1666666 [53:25<1:39:35, 209.95it/s]

finished frames 2472600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412233/1666666 [53:25<1:39:07, 210.91it/s]

finished frames 2473200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412343/1666666 [53:26<1:39:01, 211.12it/s]

finished frames 2473800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412431/1666666 [53:26<1:38:59, 211.18it/s]

finished frames 2474400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412541/1666666 [53:27<1:38:59, 211.13it/s]

finished frames 2475000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412629/1666666 [53:27<1:39:08, 210.83it/s]

finished frames 2475600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412739/1666666 [53:28<1:38:53, 211.33it/s]

finished frames 2476200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412827/1666666 [53:28<1:38:58, 211.15it/s]

finished frames 2476800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 412936/1666666 [53:29<1:38:57, 211.15it/s]

finished frames 2477400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413024/1666666 [53:29<1:41:02, 206.77it/s]

finished frames 2478000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413131/1666666 [53:30<1:39:22, 210.23it/s]

finished frames 2478600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413240/1666666 [53:30<1:38:59, 211.02it/s]

finished frames 2479200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413328/1666666 [53:31<1:38:48, 211.40it/s]

finished frames 2479800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413438/1666666 [53:31<1:38:36, 211.82it/s]

finished frames 2480400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413526/1666666 [53:32<1:45:09, 198.61it/s]

finished frames 2481000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413633/1666666 [53:32<1:40:18, 208.18it/s]

finished frames 2481600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413739/1666666 [53:33<1:40:06, 208.58it/s]

finished frames 2482200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413824/1666666 [53:33<1:39:47, 209.23it/s]

finished frames 2482800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 413931/1666666 [53:34<1:39:17, 210.29it/s]

finished frames 2483400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414039/1666666 [53:34<1:40:57, 206.78it/s]

finished frames 2484000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414126/1666666 [53:35<1:39:15, 210.31it/s]

finished frames 2484600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414236/1666666 [53:35<1:38:39, 211.57it/s]

finished frames 2485200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414324/1666666 [53:35<1:38:46, 211.32it/s]

finished frames 2485800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414434/1666666 [53:36<1:38:36, 211.64it/s]

finished frames 2486400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414522/1666666 [53:36<1:38:34, 211.72it/s]

finished frames 2487000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414632/1666666 [53:37<1:38:27, 211.93it/s]

finished frames 2487600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414742/1666666 [53:37<1:38:39, 211.50it/s]

finished frames 2488200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414830/1666666 [53:38<1:38:53, 210.98it/s]

finished frames 2488800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 414939/1666666 [53:38<1:38:49, 211.11it/s]

finished frames 2489400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415027/1666666 [53:39<1:40:47, 206.96it/s]

finished frames 2490000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415135/1666666 [53:39<1:39:05, 210.49it/s]

finished frames 2490600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415222/1666666 [53:40<1:38:37, 211.49it/s]

finished frames 2491200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415332/1666666 [53:40<1:38:57, 210.73it/s]

finished frames 2491800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415441/1666666 [53:41<1:38:58, 210.70it/s]

finished frames 2492400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415528/1666666 [53:41<1:39:14, 210.11it/s]

finished frames 2493000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415637/1666666 [53:42<1:38:07, 212.50it/s]

finished frames 2493600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415725/1666666 [53:42<1:37:40, 213.44it/s]

finished frames 2494200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415835/1666666 [53:43<1:37:07, 214.64it/s]

finished frames 2494800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 415923/1666666 [53:43<1:41:33, 205.25it/s]

finished frames 2495400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 416030/1666666 [53:44<1:45:29, 197.60it/s]

finished frames 2496000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 416139/1666666 [53:44<1:38:22, 211.87it/s]

finished frames 2496600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 416227/1666666 [53:45<1:37:38, 213.45it/s]

finished frames 2497200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 416337/1666666 [53:45<1:37:20, 214.08it/s]

finished frames 2497800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 416425/1666666 [53:45<1:37:33, 213.60it/s]

finished frames 2498400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 416535/1666666 [53:46<1:37:19, 214.08it/s]

finished frames 2499000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▍       | 416623/1666666 [53:46<1:37:13, 214.28it/s]

finished frames 2499600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 416733/1666666 [53:47<1:37:27, 213.75it/s]

finished frames 2500200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 416843/1666666 [53:47<1:36:47, 215.22it/s]

finished frames 2500800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 416931/1666666 [53:48<1:36:39, 215.50it/s]

finished frames 2501400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417041/1666666 [53:48<1:38:47, 210.83it/s]

finished frames 2502000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417129/1666666 [53:49<1:37:16, 214.08it/s]

finished frames 2502600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417239/1666666 [53:49<1:36:48, 215.11it/s]

finished frames 2503200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417327/1666666 [53:50<1:37:12, 214.19it/s]

finished frames 2503800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417437/1666666 [53:50<1:36:40, 215.36it/s]

finished frames 2504400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417525/1666666 [53:51<1:36:40, 215.36it/s]

finished frames 2505000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417635/1666666 [53:51<1:36:36, 215.49it/s]

finished frames 2505600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417723/1666666 [53:52<1:36:44, 215.16it/s]

finished frames 2506200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417833/1666666 [53:52<1:36:35, 215.48it/s]

finished frames 2506800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 417943/1666666 [53:53<1:36:40, 215.27it/s]

finished frames 2507400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418031/1666666 [53:53<1:38:45, 210.71it/s]

finished frames 2508000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418142/1666666 [53:53<1:36:07, 216.46it/s]

finished frames 2508600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418232/1666666 [53:54<1:37:12, 214.04it/s]

finished frames 2509200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418342/1666666 [53:54<1:37:56, 212.44it/s]

finished frames 2509800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418428/1666666 [53:55<1:43:31, 200.97it/s]

finished frames 2510400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418534/1666666 [53:55<1:43:39, 200.66it/s]

finished frames 2511000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418639/1666666 [53:56<1:41:52, 204.19it/s]

finished frames 2511600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418723/1666666 [53:56<1:42:17, 203.33it/s]

finished frames 2512200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418830/1666666 [53:57<1:39:44, 208.52it/s]

finished frames 2512800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 418936/1666666 [53:57<1:39:29, 209.01it/s]

finished frames 2513400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419020/1666666 [53:58<1:43:55, 200.09it/s]

finished frames 2514000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419125/1666666 [53:58<1:40:29, 206.90it/s]

finished frames 2514600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419231/1666666 [53:59<1:39:45, 208.42it/s]

finished frames 2515200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419337/1666666 [53:59<1:39:35, 208.75it/s]

finished frames 2515800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419442/1666666 [54:00<1:39:50, 208.19it/s]

finished frames 2516400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419526/1666666 [54:00<1:40:05, 207.65it/s]

finished frames 2517000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419633/1666666 [54:01<1:39:18, 209.27it/s]

finished frames 2517600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419741/1666666 [54:01<1:38:34, 210.81it/s]

finished frames 2518200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419829/1666666 [54:02<1:38:24, 211.16it/s]

finished frames 2518800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 419939/1666666 [54:02<1:38:04, 211.88it/s]

finished frames 2519400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420027/1666666 [54:03<1:39:46, 208.25it/s]

finished frames 2520000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420136/1666666 [54:03<1:37:52, 212.25it/s]

finished frames 2520600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420224/1666666 [54:04<1:37:19, 213.46it/s]

finished frames 2521200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420334/1666666 [54:04<1:37:20, 213.40it/s]

finished frames 2521800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420444/1666666 [54:05<1:36:56, 214.24it/s]

finished frames 2522400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420532/1666666 [54:05<1:37:26, 213.14it/s]

finished frames 2523000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420620/1666666 [54:05<1:45:18, 197.22it/s]

finished frames 2523600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420729/1666666 [54:06<1:38:28, 210.86it/s]

finished frames 2524200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420839/1666666 [54:06<1:39:24, 208.86it/s]

finished frames 2524800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 420927/1666666 [54:07<1:37:41, 212.52it/s]

finished frames 2525400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421037/1666666 [54:07<1:39:43, 208.19it/s]

finished frames 2526000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421124/1666666 [54:08<1:37:42, 212.46it/s]

finished frames 2526600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421234/1666666 [54:08<1:37:04, 213.82it/s]

finished frames 2527200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421344/1666666 [54:09<1:37:04, 213.82it/s]

finished frames 2527800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421432/1666666 [54:09<1:37:03, 213.81it/s]

finished frames 2528400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421542/1666666 [54:10<1:37:09, 213.58it/s]

finished frames 2529000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421630/1666666 [54:10<1:37:11, 213.49it/s]

finished frames 2529600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421740/1666666 [54:11<1:37:09, 213.56it/s]

finished frames 2530200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421828/1666666 [54:11<1:37:23, 213.01it/s]

finished frames 2530800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 421938/1666666 [54:12<1:37:05, 213.68it/s]

finished frames 2531400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422026/1666666 [54:12<1:39:24, 208.67it/s]

finished frames 2532000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422136/1666666 [54:13<1:37:32, 212.65it/s]

finished frames 2532600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422224/1666666 [54:13<1:37:13, 213.35it/s]

finished frames 2533200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422334/1666666 [54:13<1:37:13, 213.32it/s]

finished frames 2533800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422444/1666666 [54:14<1:36:43, 214.38it/s]

finished frames 2534400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422532/1666666 [54:14<1:36:28, 214.94it/s]

finished frames 2535000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422642/1666666 [54:15<1:36:06, 215.75it/s]

finished frames 2535600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422730/1666666 [54:15<1:36:02, 215.85it/s]

finished frames 2536200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422840/1666666 [54:16<1:36:51, 214.03it/s]

finished frames 2536800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 422928/1666666 [54:16<1:37:47, 211.96it/s]

finished frames 2537400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423016/1666666 [54:17<1:47:41, 192.48it/s]

finished frames 2538000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423126/1666666 [54:17<1:46:52, 193.93it/s]

finished frames 2538600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423235/1666666 [54:18<1:38:06, 211.22it/s]

finished frames 2539200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423323/1666666 [54:18<1:36:37, 214.48it/s]

finished frames 2539800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423433/1666666 [54:19<1:36:16, 215.24it/s]

finished frames 2540400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423543/1666666 [54:19<1:35:47, 216.28it/s]

finished frames 2541000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423631/1666666 [54:20<1:36:03, 215.68it/s]

finished frames 2541600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423741/1666666 [54:20<1:36:00, 215.75it/s]

finished frames 2542200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423829/1666666 [54:21<1:36:15, 215.21it/s]

finished frames 2542800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 423939/1666666 [54:21<1:36:50, 213.89it/s]

finished frames 2543400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424027/1666666 [54:21<1:38:43, 209.78it/s]

finished frames 2544000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424137/1666666 [54:22<1:36:37, 214.34it/s]

finished frames 2544600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424225/1666666 [54:22<1:36:27, 214.68it/s]

finished frames 2545200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424335/1666666 [54:23<1:36:06, 215.44it/s]

finished frames 2545800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424423/1666666 [54:23<1:36:18, 215.00it/s]

finished frames 2546400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424533/1666666 [54:24<1:36:11, 215.23it/s]

finished frames 2547000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424643/1666666 [54:24<1:36:10, 215.23it/s]

finished frames 2547600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424731/1666666 [54:25<1:36:07, 215.33it/s]

finished frames 2548200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424841/1666666 [54:25<1:36:08, 215.28it/s]

finished frames 2548800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 25%|██▌       | 424929/1666666 [54:26<1:36:25, 214.64it/s]

finished frames 2549400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425039/1666666 [54:26<1:38:18, 210.51it/s]

finished frames 2550000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425127/1666666 [54:27<1:37:02, 213.25it/s]

finished frames 2550600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425237/1666666 [54:27<1:36:36, 214.17it/s]

finished frames 2551200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425325/1666666 [54:28<1:36:27, 214.48it/s]

finished frames 2551800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425435/1666666 [54:28<1:42:07, 202.57it/s]

finished frames 2552400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425523/1666666 [54:28<1:37:18, 212.57it/s]

finished frames 2553000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425633/1666666 [54:29<1:36:06, 215.21it/s]

finished frames 2553600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425743/1666666 [54:30<1:36:04, 215.27it/s]

finished frames 2554200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425831/1666666 [54:30<1:36:13, 214.90it/s]

finished frames 2554800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 425941/1666666 [54:30<1:36:02, 215.31it/s]

finished frames 2555400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426029/1666666 [54:31<1:38:13, 210.52it/s]

finished frames 2556000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426139/1666666 [54:31<1:36:25, 214.41it/s]

finished frames 2556600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426227/1666666 [54:32<1:36:16, 214.75it/s]

finished frames 2557200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426337/1666666 [54:32<1:36:10, 214.93it/s]

finished frames 2557800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426425/1666666 [54:33<1:36:28, 214.28it/s]

finished frames 2558400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426535/1666666 [54:33<1:36:19, 214.58it/s]

finished frames 2559000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426623/1666666 [54:34<1:36:16, 214.67it/s]

finished frames 2559600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426733/1666666 [54:34<1:36:20, 214.50it/s]

finished frames 2560200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426843/1666666 [54:35<1:36:20, 214.50it/s]

finished frames 2560800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 426931/1666666 [54:35<1:36:26, 214.23it/s]

finished frames 2561400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427040/1666666 [54:36<1:39:37, 207.39it/s]

finished frames 2562000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427127/1666666 [54:36<1:37:13, 212.48it/s]

finished frames 2562600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427237/1666666 [54:37<1:36:25, 214.24it/s]

finished frames 2563200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427325/1666666 [54:37<1:36:21, 214.35it/s]

finished frames 2563800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427435/1666666 [54:37<1:36:17, 214.49it/s]

finished frames 2564400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427523/1666666 [54:38<1:36:18, 214.42it/s]

finished frames 2565000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427633/1666666 [54:38<1:36:09, 214.77it/s]

finished frames 2565600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427743/1666666 [54:39<1:36:14, 214.55it/s]

finished frames 2566200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427831/1666666 [54:39<1:36:11, 214.65it/s]

finished frames 2566800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 427941/1666666 [54:40<1:35:57, 215.16it/s]

finished frames 2567400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428029/1666666 [54:40<1:38:25, 209.75it/s]

finished frames 2568000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428139/1666666 [54:41<1:36:38, 213.60it/s]

finished frames 2568600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428227/1666666 [54:41<1:36:30, 213.88it/s]

finished frames 2569200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428337/1666666 [54:42<1:36:16, 214.39it/s]

finished frames 2569800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428425/1666666 [54:42<1:37:44, 211.16it/s]

finished frames 2570400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428535/1666666 [54:43<1:38:12, 210.13it/s]

finished frames 2571000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428643/1666666 [54:43<1:38:32, 209.40it/s]

finished frames 2571600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428728/1666666 [54:44<1:38:33, 209.33it/s]

finished frames 2572200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428836/1666666 [54:44<1:38:16, 209.94it/s]

finished frames 2572800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 428923/1666666 [54:44<1:38:20, 209.76it/s]

finished frames 2573400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429028/1666666 [54:45<1:40:46, 204.70it/s]

finished frames 2574000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429135/1666666 [54:46<1:38:38, 209.09it/s]

finished frames 2574600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429243/1666666 [54:46<1:38:14, 209.92it/s]

finished frames 2575200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429328/1666666 [54:46<1:38:20, 209.70it/s]

finished frames 2575800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429433/1666666 [54:47<1:41:16, 203.62it/s]

finished frames 2576400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429538/1666666 [54:47<1:39:00, 208.26it/s]

finished frames 2577000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429624/1666666 [54:48<1:38:49, 208.63it/s]

finished frames 2577600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429729/1666666 [54:48<1:40:07, 205.89it/s]

finished frames 2578200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429839/1666666 [54:49<1:36:51, 212.82it/s]

finished frames 2578800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 429927/1666666 [54:49<1:36:03, 214.59it/s]

finished frames 2579400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430037/1666666 [54:50<1:37:14, 211.95it/s]

finished frames 2580000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430125/1666666 [54:50<1:36:04, 214.52it/s]

finished frames 2580600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430235/1666666 [54:51<1:40:45, 204.52it/s]

finished frames 2581200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430323/1666666 [54:51<1:38:42, 208.75it/s]

finished frames 2581800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430433/1666666 [54:52<1:36:21, 213.81it/s]

finished frames 2582400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430543/1666666 [54:52<1:35:57, 214.68it/s]

finished frames 2583000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430631/1666666 [54:53<1:35:49, 214.99it/s]

finished frames 2583600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430741/1666666 [54:53<1:36:11, 214.14it/s]

finished frames 2584200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430829/1666666 [54:54<1:36:07, 214.28it/s]

finished frames 2584800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 430939/1666666 [54:54<1:36:00, 214.53it/s]

finished frames 2585400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431027/1666666 [54:55<1:37:57, 210.24it/s]

finished frames 2586000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431137/1666666 [54:55<1:37:17, 211.67it/s]

finished frames 2586600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431225/1666666 [54:55<1:37:17, 211.64it/s]

finished frames 2587200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431335/1666666 [54:56<1:37:12, 211.81it/s]

finished frames 2587800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431423/1666666 [54:56<1:37:03, 212.11it/s]

finished frames 2588400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431533/1666666 [54:57<1:37:06, 211.98it/s]

finished frames 2589000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431643/1666666 [54:57<1:36:58, 212.24it/s]

finished frames 2589600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431731/1666666 [54:58<1:37:10, 211.82it/s]

finished frames 2590200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431841/1666666 [54:58<1:37:13, 211.67it/s]

finished frames 2590800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 431929/1666666 [54:59<1:37:10, 211.75it/s]

finished frames 2591400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432039/1666666 [54:59<1:39:51, 206.06it/s]

finished frames 2592000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432126/1666666 [55:00<1:37:59, 209.99it/s]

finished frames 2592600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432236/1666666 [55:00<1:37:17, 211.46it/s]

finished frames 2593200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432324/1666666 [55:01<1:37:15, 211.53it/s]

finished frames 2593800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432434/1666666 [55:01<1:37:24, 211.16it/s]

finished frames 2594400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432522/1666666 [55:02<1:37:15, 211.50it/s]

finished frames 2595000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432631/1666666 [55:02<1:43:34, 198.58it/s]

finished frames 2595600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432738/1666666 [55:03<1:42:02, 201.54it/s]

finished frames 2596200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432823/1666666 [55:03<1:39:17, 207.11it/s]

finished frames 2596800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 432928/1666666 [55:04<1:39:18, 207.05it/s]

finished frames 2597400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433033/1666666 [55:04<1:41:36, 202.35it/s]

finished frames 2598000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433138/1666666 [55:05<1:39:46, 206.05it/s]

finished frames 2598600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433222/1666666 [55:05<1:39:47, 206.02it/s]

finished frames 2599200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433327/1666666 [55:06<1:39:37, 206.32it/s]

finished frames 2599800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433432/1666666 [55:06<1:39:28, 206.63it/s]

finished frames 2600400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433537/1666666 [55:07<1:39:19, 206.90it/s]

finished frames 2601000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433642/1666666 [55:07<1:39:14, 207.07it/s]

finished frames 2601600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433726/1666666 [55:08<1:39:31, 206.47it/s]

finished frames 2602200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433831/1666666 [55:08<1:39:21, 206.79it/s]

finished frames 2602800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 433936/1666666 [55:09<1:39:21, 206.78it/s]

finished frames 2603400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434020/1666666 [55:09<1:42:49, 199.79it/s]

finished frames 2604000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434125/1666666 [55:09<1:40:38, 204.12it/s]

finished frames 2604600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434230/1666666 [55:10<1:39:58, 205.46it/s]

finished frames 2605200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434335/1666666 [55:11<1:39:47, 205.83it/s]

finished frames 2605800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434440/1666666 [55:11<1:39:41, 206.01it/s]

finished frames 2606400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434524/1666666 [55:11<1:39:51, 205.66it/s]

finished frames 2607000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434629/1666666 [55:12<1:39:45, 205.84it/s]

finished frames 2607600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434734/1666666 [55:12<1:39:45, 205.83it/s]

finished frames 2608200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434839/1666666 [55:13<1:39:39, 206.02it/s]

finished frames 2608800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 434923/1666666 [55:13<1:42:34, 200.12it/s]

finished frames 2609400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435026/1666666 [55:14<1:46:55, 191.98it/s]

finished frames 2610000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435131/1666666 [55:14<1:41:00, 203.19it/s]

finished frames 2610600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435236/1666666 [55:15<1:39:53, 205.46it/s]

finished frames 2611200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435341/1666666 [55:15<1:39:35, 206.05it/s]

finished frames 2611800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435425/1666666 [55:16<1:40:03, 205.09it/s]

finished frames 2612400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435530/1666666 [55:16<1:39:26, 206.35it/s]

finished frames 2613000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435635/1666666 [55:17<1:39:29, 206.23it/s]

finished frames 2613600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435740/1666666 [55:17<1:39:20, 206.53it/s]

finished frames 2614200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435824/1666666 [55:18<1:39:57, 205.22it/s]

finished frames 2614800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 435929/1666666 [55:18<1:39:35, 205.98it/s]

finished frames 2615400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436034/1666666 [55:19<1:41:52, 201.33it/s]

finished frames 2616000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436139/1666666 [55:19<1:39:38, 205.83it/s]

finished frames 2616600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436223/1666666 [55:20<1:39:36, 205.88it/s]

finished frames 2617200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436328/1666666 [55:20<1:39:26, 206.20it/s]

finished frames 2617800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436433/1666666 [55:21<1:39:32, 205.97it/s]

finished frames 2618400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436538/1666666 [55:21<1:39:22, 206.30it/s]

finished frames 2619000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436622/1666666 [55:22<1:39:38, 205.76it/s]

finished frames 2619600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436727/1666666 [55:22<1:39:09, 206.73it/s]

finished frames 2620200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436832/1666666 [55:23<1:39:22, 206.28it/s]

finished frames 2620800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 436937/1666666 [55:23<1:39:28, 206.04it/s]

finished frames 2621400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 437021/1666666 [55:24<1:42:47, 199.39it/s]

finished frames 2622000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 437127/1666666 [55:24<1:38:43, 207.55it/s]

finished frames 2622600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 437232/1666666 [55:25<1:38:17, 208.45it/s]

finished frames 2623200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 437337/1666666 [55:25<1:38:32, 207.93it/s]

finished frames 2623800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▌       | 437442/1666666 [55:26<1:38:26, 208.12it/s]

finished frames 2624400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 437526/1666666 [55:26<1:38:28, 208.02it/s]

finished frames 2625000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 437631/1666666 [55:27<1:38:10, 208.66it/s]

finished frames 2625600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 437736/1666666 [55:27<1:38:07, 208.74it/s]

finished frames 2626200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 437841/1666666 [55:28<1:38:10, 208.62it/s]

finished frames 2626800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 437925/1666666 [55:28<1:38:18, 208.31it/s]

finished frames 2627400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438030/1666666 [55:29<1:40:39, 203.42it/s]

finished frames 2628000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438135/1666666 [55:29<1:38:27, 207.96it/s]

finished frames 2628600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438240/1666666 [55:30<1:38:16, 208.33it/s]

finished frames 2629200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438324/1666666 [55:30<1:38:35, 207.65it/s]

finished frames 2629800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438430/1666666 [55:30<1:38:00, 208.86it/s]

finished frames 2630400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438535/1666666 [55:31<1:38:01, 208.80it/s]

finished frames 2631000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438640/1666666 [55:31<1:38:05, 208.65it/s]

finished frames 2631600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438724/1666666 [55:32<1:38:04, 208.67it/s]

finished frames 2632200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438829/1666666 [55:32<1:38:02, 208.73it/s]

finished frames 2632800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 438934/1666666 [55:33<1:38:01, 208.76it/s]

finished frames 2633400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439039/1666666 [55:33<1:39:59, 204.61it/s]

finished frames 2634000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439125/1666666 [55:34<1:38:04, 208.61it/s]

finished frames 2634600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439234/1666666 [55:34<1:36:36, 211.75it/s]

finished frames 2635200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439322/1666666 [55:35<1:36:23, 212.22it/s]

finished frames 2635800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439432/1666666 [55:35<1:36:05, 212.87it/s]

finished frames 2636400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439520/1666666 [55:36<1:43:53, 196.88it/s]

finished frames 2637000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439628/1666666 [55:36<1:46:02, 192.85it/s]

finished frames 2637600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439737/1666666 [55:37<1:37:45, 209.18it/s]

finished frames 2638200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439824/1666666 [55:37<1:36:32, 211.79it/s]

finished frames 2638800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 439934/1666666 [55:38<1:36:03, 212.85it/s]

finished frames 2639400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440022/1666666 [55:38<1:39:09, 206.18it/s]

finished frames 2640000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440131/1666666 [55:39<1:36:37, 211.58it/s]

finished frames 2640600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440241/1666666 [55:39<1:35:28, 214.11it/s]

finished frames 2641200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440329/1666666 [55:40<1:35:04, 214.99it/s]

finished frames 2641800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440439/1666666 [55:40<1:35:03, 215.00it/s]

finished frames 2642400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440527/1666666 [55:40<1:34:54, 215.31it/s]

finished frames 2643000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440637/1666666 [55:41<1:34:44, 215.70it/s]

finished frames 2643600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440725/1666666 [55:41<1:35:06, 214.84it/s]

finished frames 2644200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440835/1666666 [55:42<1:35:20, 214.27it/s]

finished frames 2644800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 440923/1666666 [55:42<1:35:18, 214.36it/s]

finished frames 2645400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 441033/1666666 [55:43<1:37:52, 208.71it/s]

finished frames 2646000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 441142/1666666 [55:43<1:35:37, 213.60it/s]

finished frames 2646600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 441230/1666666 [55:44<1:35:19, 214.25it/s]

finished frames 2647200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 441340/1666666 [55:44<1:35:10, 214.59it/s]

finished frames 2647800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 441428/1666666 [55:45<1:35:11, 214.53it/s]

finished frames 2648400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 441538/1666666 [55:45<1:34:53, 215.18it/s]

finished frames 2649000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 26%|██▋       | 441626/1666666 [55:46<1:34:56, 215.04it/s]

finished frames 2649600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 441736/1666666 [55:46<1:34:38, 215.72it/s]

finished frames 2650200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 441824/1666666 [55:47<1:35:17, 214.24it/s]

finished frames 2650800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 441934/1666666 [55:47<1:35:48, 213.05it/s]

finished frames 2651400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442022/1666666 [55:48<1:38:23, 207.45it/s]

finished frames 2652000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442132/1666666 [55:48<1:35:01, 214.78it/s]

finished frames 2652600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442242/1666666 [55:49<1:34:24, 216.18it/s]

finished frames 2653200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442330/1666666 [55:49<1:34:04, 216.91it/s]

finished frames 2653800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442440/1666666 [55:49<1:33:59, 217.07it/s]

finished frames 2654400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442528/1666666 [55:50<1:34:07, 216.76it/s]

finished frames 2655000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442638/1666666 [55:50<1:34:01, 216.99it/s]

finished frames 2655600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442726/1666666 [55:51<1:34:07, 216.73it/s]

finished frames 2656200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442836/1666666 [55:51<1:33:55, 217.17it/s]

finished frames 2656800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 442924/1666666 [55:52<1:34:01, 216.94it/s]

finished frames 2657400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443034/1666666 [55:52<1:36:28, 211.38it/s]

finished frames 2658000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443144/1666666 [55:53<1:34:35, 215.57it/s]

finished frames 2658600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443232/1666666 [55:53<1:34:49, 215.02it/s]

finished frames 2659200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443323/1666666 [55:54<1:32:35, 220.22it/s]

finished frames 2659800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443437/1666666 [55:54<1:33:15, 218.62it/s]

finished frames 2660400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443527/1666666 [55:54<1:34:29, 215.75it/s]

finished frames 2661000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443637/1666666 [55:55<1:36:31, 211.16it/s]

finished frames 2661600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443725/1666666 [55:55<1:35:00, 214.53it/s]

finished frames 2662200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443832/1666666 [55:56<1:37:57, 208.06it/s]

finished frames 2662800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 443937/1666666 [55:56<1:39:08, 205.56it/s]

finished frames 2663400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444023/1666666 [55:57<1:38:43, 206.40it/s]

finished frames 2664000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444132/1666666 [55:57<1:35:51, 212.54it/s]

finished frames 2664600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444241/1666666 [55:58<1:37:07, 209.78it/s]

finished frames 2665200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444326/1666666 [55:58<1:43:05, 197.60it/s]

finished frames 2665800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444433/1666666 [55:59<1:46:31, 191.22it/s]

finished frames 2666400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444542/1666666 [55:59<1:38:23, 207.01it/s]

finished frames 2667000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444629/1666666 [56:00<1:37:13, 209.49it/s]

finished frames 2667600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444737/1666666 [56:00<1:36:53, 210.18it/s]

finished frames 2668200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444825/1666666 [56:01<1:36:57, 210.04it/s]

finished frames 2668800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 444934/1666666 [56:01<1:36:55, 210.08it/s]

finished frames 2669400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445021/1666666 [56:02<1:40:02, 203.53it/s]

finished frames 2670000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445129/1666666 [56:02<1:37:03, 209.76it/s]

finished frames 2670600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445237/1666666 [56:03<1:36:37, 210.68it/s]

finished frames 2671200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445325/1666666 [56:03<1:36:24, 211.13it/s]

finished frames 2671800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445435/1666666 [56:04<1:36:33, 210.79it/s]

finished frames 2672400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445523/1666666 [56:04<1:36:36, 210.67it/s]

finished frames 2673000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445633/1666666 [56:05<1:36:19, 211.26it/s]

finished frames 2673600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445743/1666666 [56:05<1:36:24, 211.07it/s]

finished frames 2674200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445830/1666666 [56:06<1:36:41, 210.45it/s]

finished frames 2674800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 445940/1666666 [56:06<1:35:29, 213.07it/s]

finished frames 2675400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446028/1666666 [56:06<1:37:05, 209.53it/s]

finished frames 2676000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446138/1666666 [56:07<1:35:03, 213.98it/s]

finished frames 2676600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446226/1666666 [56:07<1:34:55, 214.26it/s]

finished frames 2677200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446336/1666666 [56:08<1:35:00, 214.08it/s]

finished frames 2677800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446424/1666666 [56:08<1:35:03, 213.95it/s]

finished frames 2678400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446534/1666666 [56:09<1:34:56, 214.18it/s]

finished frames 2679000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446644/1666666 [56:09<1:34:53, 214.27it/s]

finished frames 2679600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446732/1666666 [56:10<1:39:53, 203.53it/s]

finished frames 2680200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446842/1666666 [56:10<1:35:02, 213.92it/s]

finished frames 2680800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 446930/1666666 [56:11<1:34:34, 214.94it/s]

finished frames 2681400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447040/1666666 [56:11<1:36:14, 211.20it/s]

finished frames 2682000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447128/1666666 [56:12<1:34:54, 214.18it/s]

finished frames 2682600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447238/1666666 [56:12<1:34:24, 215.27it/s]

finished frames 2683200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447326/1666666 [56:13<1:34:14, 215.63it/s]

finished frames 2683800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447436/1666666 [56:13<1:34:32, 214.92it/s]

finished frames 2684400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447524/1666666 [56:14<1:34:34, 214.84it/s]

finished frames 2685000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447634/1666666 [56:14<1:34:27, 215.10it/s]

finished frames 2685600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447744/1666666 [56:15<1:34:47, 214.32it/s]

finished frames 2686200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447832/1666666 [56:15<1:34:29, 214.97it/s]

finished frames 2686800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 447942/1666666 [56:15<1:34:24, 215.17it/s]

finished frames 2687400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448030/1666666 [56:16<1:36:40, 210.11it/s]

finished frames 2688000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448140/1666666 [56:16<1:34:38, 214.59it/s]

finished frames 2688600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448228/1666666 [56:17<1:34:15, 215.43it/s]

finished frames 2689200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448338/1666666 [56:17<1:34:02, 215.92it/s]

finished frames 2689800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448426/1666666 [56:18<1:34:16, 215.39it/s]

finished frames 2690400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448536/1666666 [56:18<1:34:12, 215.50it/s]

finished frames 2691000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448624/1666666 [56:19<1:34:06, 215.70it/s]

finished frames 2691600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448734/1666666 [56:19<1:34:20, 215.18it/s]

finished frames 2692200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448844/1666666 [56:20<1:34:06, 215.70it/s]

finished frames 2692800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 448932/1666666 [56:20<1:34:08, 215.59it/s]

finished frames 2693400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449042/1666666 [56:21<1:36:25, 210.45it/s]

finished frames 2694000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449130/1666666 [56:21<1:38:23, 206.23it/s]

finished frames 2694600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449240/1666666 [56:22<1:39:52, 203.15it/s]

finished frames 2695200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449327/1666666 [56:22<1:35:35, 212.25it/s]

finished frames 2695800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449437/1666666 [56:23<1:34:24, 214.89it/s]

finished frames 2696400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449525/1666666 [56:23<1:34:13, 215.29it/s]

finished frames 2697000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449635/1666666 [56:23<1:34:01, 215.72it/s]

finished frames 2697600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449723/1666666 [56:24<1:33:56, 215.89it/s]

finished frames 2698200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449833/1666666 [56:24<1:33:41, 216.44it/s]

finished frames 2698800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 449943/1666666 [56:25<1:33:29, 216.92it/s]

finished frames 2699400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450031/1666666 [56:25<1:35:56, 211.34it/s]

finished frames 2700000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450141/1666666 [56:26<1:33:58, 215.76it/s]

finished frames 2700600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450229/1666666 [56:26<1:33:28, 216.89it/s]

finished frames 2701200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450339/1666666 [56:27<1:33:14, 217.42it/s]

finished frames 2701800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450427/1666666 [56:27<1:33:24, 217.02it/s]

finished frames 2702400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450537/1666666 [56:28<1:33:13, 217.41it/s]

finished frames 2703000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450625/1666666 [56:28<1:33:21, 217.10it/s]

finished frames 2703600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450735/1666666 [56:29<1:33:22, 217.05it/s]

finished frames 2704200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450823/1666666 [56:29<1:33:29, 216.76it/s]

finished frames 2704800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 450933/1666666 [56:29<1:33:29, 216.73it/s]

finished frames 2705400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451021/1666666 [56:30<1:36:28, 210.01it/s]

finished frames 2706000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451131/1666666 [56:30<1:33:38, 216.33it/s]

finished frames 2706600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451241/1666666 [56:31<1:33:31, 216.60it/s]

finished frames 2707200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451329/1666666 [56:31<1:33:20, 217.00it/s]

finished frames 2707800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451440/1666666 [56:32<1:33:08, 217.46it/s]

finished frames 2708400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451528/1666666 [56:32<1:41:11, 200.12it/s]

finished frames 2709000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451638/1666666 [56:33<1:34:34, 214.13it/s]

finished frames 2709600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451727/1666666 [56:33<1:33:27, 216.65it/s]

finished frames 2710200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451837/1666666 [56:34<1:33:27, 216.64it/s]

finished frames 2710800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 451925/1666666 [56:34<1:33:17, 217.02it/s]

finished frames 2711400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452035/1666666 [56:35<1:35:40, 211.57it/s]

finished frames 2712000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452123/1666666 [56:35<1:33:50, 215.70it/s]

finished frames 2712600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452234/1666666 [56:36<1:33:08, 217.30it/s]

finished frames 2713200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452344/1666666 [56:36<1:34:00, 215.28it/s]

finished frames 2713800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452432/1666666 [56:36<1:34:02, 215.21it/s]

finished frames 2714400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452542/1666666 [56:37<1:34:05, 215.05it/s]

finished frames 2715000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452630/1666666 [56:37<1:34:27, 214.22it/s]

finished frames 2715600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452740/1666666 [56:38<1:33:54, 215.45it/s]

finished frames 2716200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452828/1666666 [56:38<1:33:46, 215.73it/s]

finished frames 2716800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 452938/1666666 [56:39<1:33:48, 215.64it/s]

finished frames 2717400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453026/1666666 [56:39<1:36:17, 210.06it/s]

finished frames 2718000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453136/1666666 [56:40<1:34:47, 213.38it/s]

finished frames 2718600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453224/1666666 [56:40<1:34:31, 213.96it/s]

finished frames 2719200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453334/1666666 [56:41<1:34:15, 214.53it/s]

finished frames 2719800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453444/1666666 [56:41<1:33:50, 215.46it/s]

finished frames 2720400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453532/1666666 [56:42<1:34:06, 214.86it/s]

finished frames 2721000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453642/1666666 [56:42<1:34:05, 214.85it/s]

finished frames 2721600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453730/1666666 [56:43<1:34:00, 215.06it/s]

finished frames 2722200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453840/1666666 [56:43<1:33:54, 215.25it/s]

finished frames 2722800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 453928/1666666 [56:43<1:35:52, 210.83it/s]

finished frames 2723400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454038/1666666 [56:44<1:38:17, 205.63it/s]

finished frames 2724000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454125/1666666 [56:44<1:35:18, 212.03it/s]

finished frames 2724600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454235/1666666 [56:45<1:34:31, 213.78it/s]

finished frames 2725200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454323/1666666 [56:45<1:34:33, 213.69it/s]

finished frames 2725800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454433/1666666 [56:46<1:33:53, 215.18it/s]

finished frames 2726400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454543/1666666 [56:46<1:33:51, 215.25it/s]

finished frames 2727000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454631/1666666 [56:47<1:34:26, 213.91it/s]

finished frames 2727600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454741/1666666 [56:47<1:34:21, 214.07it/s]

finished frames 2728200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454829/1666666 [56:48<1:34:21, 214.05it/s]

finished frames 2728800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 454939/1666666 [56:48<1:34:07, 214.57it/s]

finished frames 2729400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455027/1666666 [56:49<1:36:13, 209.88it/s]

finished frames 2730000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455137/1666666 [56:49<1:35:03, 212.42it/s]

finished frames 2730600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455225/1666666 [56:50<1:34:06, 214.55it/s]

finished frames 2731200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455335/1666666 [56:50<1:33:28, 215.97it/s]

finished frames 2731800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455423/1666666 [56:50<1:33:38, 215.56it/s]

finished frames 2732400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455533/1666666 [56:51<1:33:28, 215.93it/s]

finished frames 2733000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455643/1666666 [56:52<1:33:13, 216.51it/s]

finished frames 2733600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455731/1666666 [56:52<1:33:22, 216.15it/s]

finished frames 2734200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455841/1666666 [56:52<1:33:21, 216.16it/s]

finished frames 2734800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 455929/1666666 [56:53<1:33:15, 216.36it/s]

finished frames 2735400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456039/1666666 [56:53<1:35:37, 211.01it/s]

finished frames 2736000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456127/1666666 [56:54<1:33:50, 214.98it/s]

finished frames 2736600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456237/1666666 [56:54<1:33:09, 216.54it/s]

finished frames 2737200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456325/1666666 [56:55<1:44:29, 193.06it/s]

finished frames 2737800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456435/1666666 [56:55<1:35:12, 211.85it/s]

finished frames 2738400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456523/1666666 [56:56<1:33:45, 215.11it/s]

finished frames 2739000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456633/1666666 [56:56<1:33:15, 216.26it/s]

finished frames 2739600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456743/1666666 [56:57<1:32:47, 217.31it/s]

finished frames 2740200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456831/1666666 [56:57<1:33:01, 216.76it/s]

finished frames 2740800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 456941/1666666 [56:58<1:33:14, 216.25it/s]

finished frames 2741400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457029/1666666 [56:58<1:35:22, 211.37it/s]

finished frames 2742000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457139/1666666 [56:59<1:33:42, 215.12it/s]

finished frames 2742600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457227/1666666 [56:59<1:33:19, 216.00it/s]

finished frames 2743200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457337/1666666 [56:59<1:33:11, 216.29it/s]

finished frames 2743800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457425/1666666 [57:00<1:33:08, 216.36it/s]

finished frames 2744400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457535/1666666 [57:00<1:33:16, 216.05it/s]

finished frames 2745000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457623/1666666 [57:01<1:33:03, 216.52it/s]

finished frames 2745600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457733/1666666 [57:01<1:33:03, 216.50it/s]

finished frames 2746200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457843/1666666 [57:02<1:33:02, 216.54it/s]

finished frames 2746800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 457931/1666666 [57:02<1:33:06, 216.38it/s]

finished frames 2747400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 458041/1666666 [57:03<1:35:08, 211.73it/s]

finished frames 2748000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 458129/1666666 [57:03<1:33:38, 215.08it/s]

finished frames 2748600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 458239/1666666 [57:04<1:33:07, 216.27it/s]

finished frames 2749200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 27%|██▋       | 458328/1666666 [57:04<1:32:26, 217.86it/s]

finished frames 2749800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 458438/1666666 [57:05<1:32:38, 217.38it/s]

finished frames 2750400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 458527/1666666 [57:05<1:32:19, 218.11it/s]

finished frames 2751000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 458637/1666666 [57:05<1:32:37, 217.36it/s]

finished frames 2751600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 458725/1666666 [57:06<1:40:40, 199.97it/s]

finished frames 2752200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 458835/1666666 [57:06<1:34:07, 213.88it/s]

finished frames 2752800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 458923/1666666 [57:07<1:36:18, 209.01it/s]

finished frames 2753400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459033/1666666 [57:07<1:35:36, 210.51it/s]

finished frames 2754000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459143/1666666 [57:08<1:33:12, 215.91it/s]

finished frames 2754600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459231/1666666 [57:08<1:33:06, 216.13it/s]

finished frames 2755200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459342/1666666 [57:09<1:32:26, 217.68it/s]

finished frames 2755800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459430/1666666 [57:09<1:32:43, 216.97it/s]

finished frames 2756400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459540/1666666 [57:10<1:32:45, 216.89it/s]

finished frames 2757000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459629/1666666 [57:10<1:32:26, 217.61it/s]

finished frames 2757600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459739/1666666 [57:11<1:32:13, 218.13it/s]

finished frames 2758200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459827/1666666 [57:11<1:32:23, 217.70it/s]

finished frames 2758800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 459937/1666666 [57:12<1:32:51, 216.60it/s]

finished frames 2759400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460025/1666666 [57:12<1:35:54, 209.69it/s]

finished frames 2760000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460132/1666666 [57:12<1:36:12, 209.01it/s]

finished frames 2760600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460240/1666666 [57:13<1:35:45, 209.99it/s]

finished frames 2761200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460327/1666666 [57:13<1:35:33, 210.40it/s]

finished frames 2761800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460437/1666666 [57:14<1:35:29, 210.51it/s]

finished frames 2762400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460524/1666666 [57:14<1:35:36, 210.27it/s]

finished frames 2763000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460634/1666666 [57:15<1:35:32, 210.38it/s]

finished frames 2763600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460740/1666666 [57:15<1:36:20, 208.62it/s]

finished frames 2764200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460824/1666666 [57:16<1:36:46, 207.67it/s]

finished frames 2764800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 460929/1666666 [57:16<1:37:00, 207.17it/s]

finished frames 2765400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461034/1666666 [57:17<1:38:54, 203.16it/s]

finished frames 2766000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461139/1666666 [57:17<1:37:32, 205.97it/s]

finished frames 2766600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461223/1666666 [57:18<1:45:13, 190.94it/s]

finished frames 2767200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461328/1666666 [57:18<1:38:06, 204.78it/s]

finished frames 2767800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461433/1666666 [57:19<1:36:44, 207.62it/s]

finished frames 2768400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461538/1666666 [57:19<1:36:17, 208.58it/s]

finished frames 2769000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461622/1666666 [57:20<1:36:40, 207.76it/s]

finished frames 2769600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461727/1666666 [57:20<1:36:46, 207.51it/s]

finished frames 2770200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461832/1666666 [57:21<1:36:25, 208.26it/s]

finished frames 2770800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 461937/1666666 [57:21<1:36:25, 208.24it/s]

finished frames 2771400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462022/1666666 [57:22<1:38:00, 204.86it/s]

finished frames 2772000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462131/1666666 [57:22<1:35:17, 210.68it/s]

finished frames 2772600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462241/1666666 [57:23<1:34:19, 212.82it/s]

finished frames 2773200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462329/1666666 [57:23<1:34:09, 213.17it/s]

finished frames 2773800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462439/1666666 [57:24<1:33:56, 213.66it/s]

finished frames 2774400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462527/1666666 [57:24<1:34:06, 213.26it/s]

finished frames 2775000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462637/1666666 [57:25<1:34:01, 213.44it/s]

finished frames 2775600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462725/1666666 [57:25<1:34:10, 213.08it/s]

finished frames 2776200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462835/1666666 [57:25<1:34:11, 213.01it/s]

finished frames 2776800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 462923/1666666 [57:26<1:34:38, 211.99it/s]

finished frames 2777400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463033/1666666 [57:26<1:36:31, 207.83it/s]

finished frames 2778000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463142/1666666 [57:27<1:34:07, 213.10it/s]

finished frames 2778600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463230/1666666 [57:27<1:33:57, 213.47it/s]

finished frames 2779200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463340/1666666 [57:28<1:33:59, 213.36it/s]

finished frames 2779800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463428/1666666 [57:28<1:33:55, 213.50it/s]

finished frames 2780400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463538/1666666 [57:29<1:39:26, 201.65it/s]

finished frames 2781000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463624/1666666 [57:29<1:39:34, 201.35it/s]

finished frames 2781600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463733/1666666 [57:30<1:34:46, 211.55it/s]

finished frames 2782200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463843/1666666 [57:30<1:33:42, 213.92it/s]

finished frames 2782800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 463931/1666666 [57:31<1:33:58, 213.31it/s]

finished frames 2783400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464041/1666666 [57:31<1:36:17, 208.16it/s]

finished frames 2784000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464128/1666666 [57:32<1:34:39, 211.74it/s]

finished frames 2784600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464238/1666666 [57:32<1:34:41, 211.65it/s]

finished frames 2785200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464326/1666666 [57:33<1:34:17, 212.52it/s]

finished frames 2785800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464436/1666666 [57:33<1:34:01, 213.11it/s]

finished frames 2786400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464524/1666666 [57:34<1:33:53, 213.39it/s]

finished frames 2787000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464634/1666666 [57:34<1:34:06, 212.88it/s]

finished frames 2787600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464744/1666666 [57:35<1:33:50, 213.47it/s]

finished frames 2788200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464832/1666666 [57:35<1:33:47, 213.55it/s]

finished frames 2788800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 464942/1666666 [57:35<1:33:51, 213.40it/s]

finished frames 2789400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465030/1666666 [57:36<1:35:08, 210.52it/s]

finished frames 2790000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465140/1666666 [57:36<1:33:05, 215.13it/s]

finished frames 2790600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465228/1666666 [57:37<1:32:36, 216.21it/s]

finished frames 2791200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465338/1666666 [57:37<1:32:39, 216.08it/s]

finished frames 2791800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465426/1666666 [57:38<1:32:40, 216.02it/s]

finished frames 2792400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465536/1666666 [57:38<1:32:39, 216.04it/s]

finished frames 2793000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465624/1666666 [57:39<1:32:52, 215.54it/s]

finished frames 2793600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465734/1666666 [57:39<1:32:52, 215.50it/s]

finished frames 2794200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465844/1666666 [57:40<1:32:51, 215.54it/s]

finished frames 2794800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 465932/1666666 [57:40<1:32:57, 215.27it/s]

finished frames 2795400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466040/1666666 [57:41<1:41:35, 196.97it/s]

finished frames 2796000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466127/1666666 [57:41<1:36:27, 207.44it/s]

finished frames 2796600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466236/1666666 [57:42<1:34:58, 210.64it/s]

finished frames 2797200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466324/1666666 [57:42<1:34:15, 212.26it/s]

finished frames 2797800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466434/1666666 [57:43<1:34:21, 212.00it/s]

finished frames 2798400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466522/1666666 [57:43<1:34:32, 211.59it/s]

finished frames 2799000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466632/1666666 [57:43<1:34:27, 211.73it/s]

finished frames 2799600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466742/1666666 [57:44<1:34:15, 212.17it/s]

finished frames 2800200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466827/1666666 [57:44<1:35:45, 208.83it/s]

finished frames 2800800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 466932/1666666 [57:45<1:36:24, 207.39it/s]

finished frames 2801400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467037/1666666 [57:45<1:38:47, 202.37it/s]

finished frames 2802000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467124/1666666 [57:46<1:35:02, 210.34it/s]

finished frames 2802600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467234/1666666 [57:46<1:34:01, 212.62it/s]

finished frames 2803200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467344/1666666 [57:47<1:33:38, 213.47it/s]

finished frames 2803800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467432/1666666 [57:47<1:33:18, 214.19it/s]

finished frames 2804400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467542/1666666 [57:48<1:33:34, 213.57it/s]

finished frames 2805000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467630/1666666 [57:48<1:33:20, 214.09it/s]

finished frames 2805600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467740/1666666 [57:49<1:32:57, 214.94it/s]

finished frames 2806200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467828/1666666 [57:49<1:33:10, 214.45it/s]

finished frames 2806800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 467938/1666666 [57:50<1:33:07, 214.54it/s]

finished frames 2807400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468026/1666666 [57:50<1:35:15, 209.70it/s]

finished frames 2808000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468136/1666666 [57:51<1:33:21, 213.96it/s]

finished frames 2808600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468224/1666666 [57:51<1:41:11, 197.40it/s]

finished frames 2809200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468334/1666666 [57:52<1:34:20, 211.71it/s]

finished frames 2809800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468444/1666666 [57:52<1:33:03, 214.60it/s]

finished frames 2810400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468532/1666666 [57:52<1:32:36, 215.61it/s]

finished frames 2811000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468642/1666666 [57:53<1:32:36, 215.62it/s]

finished frames 2811600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468730/1666666 [57:53<1:32:38, 215.51it/s]

finished frames 2812200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468840/1666666 [57:54<1:33:15, 214.08it/s]

finished frames 2812800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 468928/1666666 [57:54<1:32:17, 216.31it/s]

finished frames 2813400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469037/1666666 [57:55<1:36:41, 206.42it/s]

finished frames 2814000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469125/1666666 [57:55<1:34:28, 211.27it/s]

finished frames 2814600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469236/1666666 [57:56<1:36:00, 207.85it/s]

finished frames 2815200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469341/1666666 [57:56<1:37:47, 204.04it/s]

finished frames 2815800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469426/1666666 [57:57<1:36:49, 206.08it/s]

finished frames 2816400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469532/1666666 [57:57<1:35:48, 208.25it/s]

finished frames 2817000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469637/1666666 [57:58<1:38:43, 202.09it/s]

finished frames 2817600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469723/1666666 [57:58<1:36:31, 206.66it/s]

finished frames 2818200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469832/1666666 [57:59<1:33:49, 212.62it/s]

finished frames 2818800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 469942/1666666 [57:59<1:33:26, 213.44it/s]

finished frames 2819400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470029/1666666 [58:00<1:36:45, 206.14it/s]

finished frames 2820000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470134/1666666 [58:00<1:35:58, 207.78it/s]

finished frames 2820600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470240/1666666 [58:01<1:35:37, 208.54it/s]

finished frames 2821200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470324/1666666 [58:01<1:36:01, 207.66it/s]

finished frames 2821800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470429/1666666 [58:02<1:36:01, 207.61it/s]

finished frames 2822400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470534/1666666 [58:02<1:35:46, 208.15it/s]

finished frames 2823000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470639/1666666 [58:03<1:35:51, 207.96it/s]

finished frames 2823600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470723/1666666 [58:03<1:35:52, 207.91it/s]

finished frames 2824200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470828/1666666 [58:03<1:36:12, 207.17it/s]

finished frames 2824800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 470933/1666666 [58:04<1:35:57, 207.67it/s]

finished frames 2825400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471038/1666666 [58:04<1:37:58, 203.40it/s]

finished frames 2826000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471122/1666666 [58:05<1:36:58, 205.46it/s]

finished frames 2826600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471227/1666666 [58:05<1:36:41, 206.07it/s]

finished frames 2827200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471332/1666666 [58:06<1:36:22, 206.71it/s]

finished frames 2827800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471437/1666666 [58:06<1:36:06, 207.27it/s]

finished frames 2828400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471542/1666666 [58:07<1:36:13, 207.00it/s]

finished frames 2829000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471626/1666666 [58:07<1:36:00, 207.44it/s]

finished frames 2829600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471731/1666666 [58:08<1:36:07, 207.17it/s]

finished frames 2830200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471836/1666666 [58:08<1:36:03, 207.29it/s]

finished frames 2830800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 471941/1666666 [58:09<1:35:28, 208.56it/s]

finished frames 2831400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472025/1666666 [58:09<1:38:00, 203.14it/s]

finished frames 2832000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472130/1666666 [58:10<1:36:15, 206.83it/s]

finished frames 2832600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472237/1666666 [58:10<1:35:06, 209.32it/s]

finished frames 2833200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472342/1666666 [58:11<1:35:08, 209.23it/s]

finished frames 2833800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472428/1666666 [58:11<1:35:15, 208.96it/s]

finished frames 2834400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472535/1666666 [58:12<1:34:56, 209.64it/s]

finished frames 2835000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472641/1666666 [58:12<1:35:04, 209.33it/s]

finished frames 2835600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472727/1666666 [58:13<1:34:54, 209.66it/s]

finished frames 2836200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472832/1666666 [58:13<1:35:26, 208.46it/s]

finished frames 2836800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 472918/1666666 [58:14<1:45:05, 189.31it/s]

finished frames 2837400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473026/1666666 [58:14<1:37:01, 205.05it/s]

finished frames 2838000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473135/1666666 [58:15<1:35:09, 209.04it/s]

finished frames 2838600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473223/1666666 [58:15<1:33:13, 213.34it/s]

finished frames 2839200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473333/1666666 [58:16<1:32:36, 214.76it/s]

finished frames 2839800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473443/1666666 [58:16<1:32:32, 214.88it/s]

finished frames 2840400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473531/1666666 [58:16<1:32:21, 215.32it/s]

finished frames 2841000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473641/1666666 [58:17<1:32:02, 216.02it/s]

finished frames 2841600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473729/1666666 [58:17<1:32:18, 215.40it/s]

finished frames 2842200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473839/1666666 [58:18<1:31:45, 216.66it/s]

finished frames 2842800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 473927/1666666 [58:18<1:31:50, 216.44it/s]

finished frames 2843400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474037/1666666 [58:19<1:34:08, 211.12it/s]

finished frames 2844000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474125/1666666 [58:19<1:32:44, 214.31it/s]

finished frames 2844600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474235/1666666 [58:20<1:32:20, 215.24it/s]

finished frames 2845200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474323/1666666 [58:20<1:32:38, 214.50it/s]

finished frames 2845800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474433/1666666 [58:21<1:32:31, 214.75it/s]

finished frames 2846400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474543/1666666 [58:21<1:32:26, 214.94it/s]

finished frames 2847000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474631/1666666 [58:22<1:32:13, 215.43it/s]

finished frames 2847600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474741/1666666 [58:22<1:32:01, 215.87it/s]

finished frames 2848200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474829/1666666 [58:23<1:32:06, 215.66it/s]

finished frames 2848800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 28%|██▊       | 474939/1666666 [58:23<1:31:58, 215.97it/s]

finished frames 2849400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475027/1666666 [58:23<1:34:05, 211.08it/s]

finished frames 2850000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475137/1666666 [58:24<1:32:40, 214.30it/s]

finished frames 2850600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475225/1666666 [58:24<1:32:26, 214.81it/s]

finished frames 2851200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475335/1666666 [58:25<1:36:34, 205.60it/s]

finished frames 2851800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475423/1666666 [58:25<1:33:22, 212.61it/s]

finished frames 2852400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475533/1666666 [58:26<1:36:11, 206.38it/s]

finished frames 2853000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475643/1666666 [58:26<1:32:49, 213.85it/s]

finished frames 2853600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475731/1666666 [58:27<1:32:40, 214.19it/s]

finished frames 2854200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475841/1666666 [58:27<1:32:35, 214.35it/s]

finished frames 2854800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 475929/1666666 [58:28<1:32:27, 214.65it/s]

finished frames 2855400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476039/1666666 [58:28<1:34:00, 211.10it/s]

finished frames 2856000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476127/1666666 [58:29<1:32:47, 213.82it/s]

finished frames 2856600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476237/1666666 [58:29<1:32:24, 214.69it/s]

finished frames 2857200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476325/1666666 [58:30<1:32:12, 215.15it/s]

finished frames 2857800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476435/1666666 [58:30<1:31:58, 215.69it/s]

finished frames 2858400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476523/1666666 [58:31<1:31:49, 216.01it/s]

finished frames 2859000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476633/1666666 [58:31<1:31:32, 216.66it/s]

finished frames 2859600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476743/1666666 [58:32<1:31:28, 216.79it/s]

finished frames 2860200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476831/1666666 [58:32<1:31:34, 216.54it/s]

finished frames 2860800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 476941/1666666 [58:32<1:31:16, 217.26it/s]

finished frames 2861400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477029/1666666 [58:33<1:33:46, 211.45it/s]

finished frames 2862000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477139/1666666 [58:33<1:31:59, 215.52it/s]

finished frames 2862600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477227/1666666 [58:34<1:31:41, 216.19it/s]

finished frames 2863200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477337/1666666 [58:34<1:31:14, 217.26it/s]

finished frames 2863800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477425/1666666 [58:35<1:31:39, 216.25it/s]

finished frames 2864400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477535/1666666 [58:35<1:31:39, 216.23it/s]

finished frames 2865000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477623/1666666 [58:36<1:31:39, 216.21it/s]

finished frames 2865600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477733/1666666 [58:36<1:39:37, 198.92it/s]

finished frames 2866200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477843/1666666 [58:37<1:33:01, 213.01it/s]

finished frames 2866800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 477931/1666666 [58:37<1:32:14, 214.79it/s]

finished frames 2867400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478041/1666666 [58:38<1:34:11, 210.31it/s]

finished frames 2868000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478129/1666666 [58:38<1:32:53, 213.26it/s]

finished frames 2868600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478239/1666666 [58:39<1:32:31, 214.08it/s]

finished frames 2869200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478327/1666666 [58:39<1:32:18, 214.57it/s]

finished frames 2869800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478437/1666666 [58:39<1:32:15, 214.65it/s]

finished frames 2870400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478525/1666666 [58:40<1:32:22, 214.37it/s]

finished frames 2871000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478635/1666666 [58:40<1:32:17, 214.56it/s]

finished frames 2871600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478723/1666666 [58:41<1:32:20, 214.43it/s]

finished frames 2872200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478833/1666666 [58:41<1:32:22, 214.30it/s]

finished frames 2872800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 478943/1666666 [58:42<1:32:04, 214.99it/s]

finished frames 2873400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 479031/1666666 [58:42<1:34:28, 209.53it/s]

finished frames 2874000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▊       | 479140/1666666 [58:43<1:33:02, 212.73it/s]

finished frames 2874600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 479228/1666666 [58:43<1:32:21, 214.29it/s]

finished frames 2875200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 479338/1666666 [58:44<1:32:10, 214.67it/s]

finished frames 2875800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 479426/1666666 [58:44<1:31:55, 215.24it/s]

finished frames 2876400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 479536/1666666 [58:45<1:32:05, 214.86it/s]

finished frames 2877000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 479624/1666666 [58:45<1:32:13, 214.51it/s]

finished frames 2877600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 479734/1666666 [58:46<1:32:16, 214.37it/s]

finished frames 2878200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 479844/1666666 [58:46<1:32:09, 214.62it/s]

finished frames 2878800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 479932/1666666 [58:46<1:32:12, 214.52it/s]

finished frames 2879400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480042/1666666 [58:47<1:34:27, 209.38it/s]

finished frames 2880000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480129/1666666 [58:47<1:41:05, 195.62it/s]

finished frames 2880600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480238/1666666 [58:48<1:33:32, 211.38it/s]

finished frames 2881200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480326/1666666 [58:48<1:35:30, 207.02it/s]

finished frames 2881800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480436/1666666 [58:49<1:32:53, 212.83it/s]

finished frames 2882400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480524/1666666 [58:49<1:32:28, 213.76it/s]

finished frames 2883000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480634/1666666 [58:50<1:32:21, 214.03it/s]

finished frames 2883600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480744/1666666 [58:50<1:32:14, 214.27it/s]

finished frames 2884200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480832/1666666 [58:51<1:32:15, 214.24it/s]

finished frames 2884800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 480942/1666666 [58:51<1:32:10, 214.40it/s]

finished frames 2885400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481030/1666666 [58:52<1:34:35, 208.92it/s]

finished frames 2886000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481140/1666666 [58:52<1:32:46, 212.98it/s]

finished frames 2886600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481228/1666666 [58:53<1:32:37, 213.31it/s]

finished frames 2887200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481338/1666666 [58:53<1:32:13, 214.21it/s]

finished frames 2887800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481426/1666666 [58:54<1:31:57, 214.82it/s]

finished frames 2888400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481536/1666666 [58:54<1:32:04, 214.53it/s]

finished frames 2889000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481624/1666666 [58:54<1:32:07, 214.39it/s]

finished frames 2889600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481734/1666666 [58:55<1:33:00, 212.33it/s]

finished frames 2890200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481843/1666666 [58:56<1:33:27, 211.29it/s]

finished frames 2890800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 481930/1666666 [58:56<1:33:41, 210.75it/s]

finished frames 2891400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482038/1666666 [58:56<1:35:31, 206.70it/s]

finished frames 2892000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482125/1666666 [58:57<1:32:58, 212.36it/s]

finished frames 2892600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482235/1666666 [58:57<1:32:18, 213.86it/s]

finished frames 2893200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482323/1666666 [58:58<1:32:25, 213.58it/s]

finished frames 2893800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482433/1666666 [58:58<1:32:12, 214.04it/s]

finished frames 2894400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482521/1666666 [58:59<1:40:21, 196.66it/s]

finished frames 2895000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482629/1666666 [58:59<1:42:20, 192.83it/s]

finished frames 2895600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482738/1666666 [59:00<1:33:50, 210.28it/s]

finished frames 2896200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482826/1666666 [59:00<1:32:43, 212.78it/s]

finished frames 2896800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 482936/1666666 [59:01<1:32:30, 213.28it/s]

finished frames 2897400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483024/1666666 [59:01<1:34:22, 209.03it/s]

finished frames 2898000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483134/1666666 [59:02<1:32:48, 212.54it/s]

finished frames 2898600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483222/1666666 [59:02<1:32:49, 212.47it/s]

finished frames 2899200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483332/1666666 [59:03<1:32:18, 213.66it/s]

finished frames 2899800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483442/1666666 [59:03<1:32:13, 213.83it/s]

finished frames 2900400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483530/1666666 [59:04<1:32:21, 213.49it/s]

finished frames 2901000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483640/1666666 [59:04<1:32:14, 213.75it/s]

finished frames 2901600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483728/1666666 [59:04<1:32:12, 213.82it/s]

finished frames 2902200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483838/1666666 [59:05<1:32:13, 213.74it/s]

finished frames 2902800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 483926/1666666 [59:05<1:32:17, 213.58it/s]

finished frames 2903400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484036/1666666 [59:06<1:34:23, 208.80it/s]

finished frames 2904000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484123/1666666 [59:06<1:32:45, 212.47it/s]

finished frames 2904600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484233/1666666 [59:07<1:32:09, 213.84it/s]

finished frames 2905200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484343/1666666 [59:07<1:33:05, 211.67it/s]

finished frames 2905800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484431/1666666 [59:08<1:32:42, 212.53it/s]

finished frames 2906400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484541/1666666 [59:08<1:32:17, 213.49it/s]

finished frames 2907000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484629/1666666 [59:09<1:33:15, 211.26it/s]

finished frames 2907600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484738/1666666 [59:09<1:33:25, 210.86it/s]

finished frames 2908200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484825/1666666 [59:10<1:33:29, 210.69it/s]

finished frames 2908800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 484933/1666666 [59:10<1:37:56, 201.09it/s]

finished frames 2909400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485039/1666666 [59:11<1:37:01, 202.98it/s]

finished frames 2910000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485124/1666666 [59:11<1:34:46, 207.79it/s]

finished frames 2910600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485232/1666666 [59:12<1:33:51, 209.80it/s]

finished frames 2911200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485340/1666666 [59:12<1:33:29, 210.60it/s]

finished frames 2911800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485427/1666666 [59:13<1:33:39, 210.19it/s]

finished frames 2912400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485536/1666666 [59:13<1:33:02, 211.59it/s]

finished frames 2913000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485624/1666666 [59:14<1:32:07, 213.66it/s]

finished frames 2913600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485734/1666666 [59:14<1:31:54, 214.15it/s]

finished frames 2914200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485844/1666666 [59:15<1:31:51, 214.26it/s]

finished frames 2914800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 485932/1666666 [59:15<1:31:48, 214.34it/s]

finished frames 2915400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486020/1666666 [59:15<1:34:55, 207.29it/s]

finished frames 2916000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486129/1666666 [59:16<1:33:52, 209.58it/s]

finished frames 2916600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486239/1666666 [59:16<1:31:53, 214.10it/s]

finished frames 2917200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486327/1666666 [59:17<1:31:46, 214.36it/s]

finished frames 2917800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486437/1666666 [59:17<1:31:40, 214.57it/s]

finished frames 2918400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486525/1666666 [59:18<1:31:42, 214.45it/s]

finished frames 2919000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486635/1666666 [59:18<1:31:38, 214.60it/s]

finished frames 2919600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486723/1666666 [59:19<1:31:29, 214.96it/s]

finished frames 2920200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486833/1666666 [59:19<1:31:47, 214.24it/s]

finished frames 2920800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 486943/1666666 [59:20<1:31:31, 214.82it/s]

finished frames 2921400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487031/1666666 [59:20<1:33:55, 209.33it/s]

finished frames 2922000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487141/1666666 [59:21<1:31:43, 214.31it/s]

finished frames 2922600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487229/1666666 [59:21<1:31:27, 214.95it/s]

finished frames 2923200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487339/1666666 [59:22<1:34:31, 207.93it/s]

finished frames 2923800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487426/1666666 [59:22<1:37:54, 200.73it/s]

finished frames 2924400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487535/1666666 [59:23<1:33:02, 211.22it/s]

finished frames 2925000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487623/1666666 [59:23<1:32:13, 213.06it/s]

finished frames 2925600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487733/1666666 [59:23<1:31:54, 213.78it/s]

finished frames 2926200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487843/1666666 [59:24<1:31:50, 213.92it/s]

finished frames 2926800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 487931/1666666 [59:24<1:31:49, 213.95it/s]

finished frames 2927400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488041/1666666 [59:25<1:34:38, 207.54it/s]

finished frames 2928000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488128/1666666 [59:25<1:33:31, 210.02it/s]

finished frames 2928600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488237/1666666 [59:26<1:32:55, 211.35it/s]

finished frames 2929200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488325/1666666 [59:26<1:32:40, 211.92it/s]

finished frames 2929800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488435/1666666 [59:27<1:32:49, 211.57it/s]

finished frames 2930400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488523/1666666 [59:27<1:32:44, 211.72it/s]

finished frames 2931000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488633/1666666 [59:28<1:32:49, 211.51it/s]

finished frames 2931600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488742/1666666 [59:28<1:33:44, 209.42it/s]

finished frames 2932200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488826/1666666 [59:29<1:34:08, 208.51it/s]

finished frames 2932800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 488931/1666666 [59:29<1:34:05, 208.62it/s]

finished frames 2933400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489036/1666666 [59:30<1:36:11, 204.04it/s]

finished frames 2934000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489142/1666666 [59:30<1:34:16, 208.17it/s]

finished frames 2934600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489227/1666666 [59:31<1:33:58, 208.81it/s]

finished frames 2935200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489333/1666666 [59:31<1:33:44, 209.33it/s]

finished frames 2935800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489438/1666666 [59:32<1:33:54, 208.95it/s]

finished frames 2936400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489524/1666666 [59:32<1:33:35, 209.63it/s]

finished frames 2937000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489629/1666666 [59:33<1:50:30, 177.51it/s]

finished frames 2937600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489736/1666666 [59:33<1:36:26, 203.39it/s]

finished frames 2938200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489842/1666666 [59:34<1:34:11, 208.24it/s]

finished frames 2938800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 489927/1666666 [59:34<1:34:11, 208.24it/s]

finished frames 2939400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490032/1666666 [59:35<1:36:37, 202.94it/s]

finished frames 2940000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490137/1666666 [59:35<1:34:45, 206.92it/s]

finished frames 2940600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490242/1666666 [59:36<1:34:43, 207.01it/s]

finished frames 2941200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490326/1666666 [59:36<1:34:46, 206.87it/s]

finished frames 2941800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490431/1666666 [59:36<1:34:27, 207.54it/s]

finished frames 2942400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490536/1666666 [59:37<1:34:06, 208.29it/s]

finished frames 2943000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490641/1666666 [59:37<1:34:13, 208.03it/s]

finished frames 2943600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490725/1666666 [59:38<1:34:24, 207.60it/s]

finished frames 2944200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490830/1666666 [59:38<1:34:23, 207.62it/s]

finished frames 2944800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 490935/1666666 [59:39<1:34:20, 207.70it/s]

finished frames 2945400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 491040/1666666 [59:39<1:36:43, 202.56it/s]

finished frames 2946000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 491124/1666666 [59:40<1:35:19, 205.51it/s]

finished frames 2946600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 491229/1666666 [59:40<1:34:18, 207.74it/s]

finished frames 2947200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 491336/1666666 [59:41<1:33:41, 209.08it/s]

finished frames 2947800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 491423/1666666 [59:41<1:32:29, 211.79it/s]

finished frames 2948400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 491533/1666666 [59:42<1:32:02, 212.81it/s]

finished frames 2949000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 29%|██▉       | 491643/1666666 [59:42<1:31:42, 213.53it/s]

finished frames 2949600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 491731/1666666 [59:43<1:31:43, 213.49it/s]

finished frames 2950200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 491841/1666666 [59:43<1:31:54, 213.04it/s]

finished frames 2950800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 491929/1666666 [59:44<1:31:32, 213.87it/s]

finished frames 2951400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492038/1666666 [59:44<1:36:19, 203.23it/s]

finished frames 2952000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492124/1666666 [59:45<1:37:15, 201.26it/s]

finished frames 2952600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492233/1666666 [59:45<1:32:33, 211.46it/s]

finished frames 2953200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492343/1666666 [59:46<1:31:36, 213.63it/s]

finished frames 2953800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492431/1666666 [59:46<1:31:51, 213.06it/s]

finished frames 2954400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492541/1666666 [59:47<1:33:51, 208.49it/s]

finished frames 2955000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492628/1666666 [59:47<1:32:35, 211.31it/s]

finished frames 2955600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492738/1666666 [59:48<1:31:21, 214.16it/s]

finished frames 2956200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492826/1666666 [59:48<1:31:07, 214.71it/s]

finished frames 2956800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 492936/1666666 [59:48<1:30:52, 215.27it/s]

finished frames 2957400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493024/1666666 [59:49<1:32:52, 210.59it/s]

finished frames 2958000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493134/1666666 [59:49<1:31:30, 213.74it/s]

finished frames 2958600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493244/1666666 [59:50<1:31:06, 214.67it/s]

finished frames 2959200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493332/1666666 [59:50<1:31:08, 214.57it/s]

finished frames 2959800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493442/1666666 [59:51<1:30:55, 215.04it/s]

finished frames 2960400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493530/1666666 [59:51<1:30:57, 214.95it/s]

finished frames 2961000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493640/1666666 [59:52<1:30:58, 214.90it/s]

finished frames 2961600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493728/1666666 [59:52<1:31:02, 214.74it/s]

finished frames 2962200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493838/1666666 [59:53<1:30:59, 214.83it/s]

finished frames 2962800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 493926/1666666 [59:53<1:31:23, 213.85it/s]

finished frames 2963400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494036/1666666 [59:54<1:31:52, 212.72it/s]

finished frames 2964000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494124/1666666 [59:54<1:31:17, 214.08it/s]

finished frames 2964600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494235/1666666 [59:55<1:32:00, 212.39it/s]

finished frames 2965200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494323/1666666 [59:55<1:32:30, 211.20it/s]

finished frames 2965800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494431/1666666 [59:55<1:35:28, 204.64it/s]

finished frames 2966400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494537/1666666 [59:56<1:38:10, 199.00it/s]

finished frames 2967000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494641/1666666 [59:57<1:37:05, 201.18it/s]

finished frames 2967600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494725/1666666 [59:57<1:35:18, 204.95it/s]

finished frames 2968200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494830/1666666 [59:57<1:34:06, 207.54it/s]

finished frames 2968800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 494935/1666666 [59:58<1:35:52, 203.69it/s]

finished frames 2969400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495040/1666666 [59:58<1:35:52, 203.66it/s]

finished frames 2970000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495124/1666666 [59:59<1:34:16, 207.12it/s]

finished frames 2970600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495229/1666666 [59:59<1:34:14, 207.16it/s]

finished frames 2971200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495335/1666666 [1:00:00<1:33:34, 208.63it/s]

finished frames 2971800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495441/1666666 [1:00:00<1:33:17, 209.24it/s]

finished frames 2972400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495526/1666666 [1:00:01<1:33:24, 208.98it/s]

finished frames 2973000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495631/1666666 [1:00:01<1:33:32, 208.65it/s]

finished frames 2973600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495736/1666666 [1:00:02<1:33:33, 208.61it/s]

finished frames 2974200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495823/1666666 [1:00:02<1:32:07, 211.81it/s]

finished frames 2974800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 495933/1666666 [1:00:03<1:31:41, 212.79it/s]

finished frames 2975400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496021/1666666 [1:00:03<1:34:25, 206.62it/s]

finished frames 2976000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496131/1666666 [1:00:04<1:31:44, 212.65it/s]

finished frames 2976600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496241/1666666 [1:00:04<1:31:19, 213.59it/s]

finished frames 2977200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496329/1666666 [1:00:05<1:31:18, 213.61it/s]

finished frames 2977800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496439/1666666 [1:00:05<1:31:13, 213.79it/s]

finished frames 2978400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496527/1666666 [1:00:06<1:31:06, 214.06it/s]

finished frames 2979000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496637/1666666 [1:00:06<1:31:10, 213.88it/s]

finished frames 2979600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496725/1666666 [1:00:07<1:42:14, 190.73it/s]

finished frames 2980200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496833/1666666 [1:00:07<1:44:32, 186.50it/s]

finished frames 2980800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 496942/1666666 [1:00:08<1:33:22, 208.80it/s]

finished frames 2981400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497030/1666666 [1:00:08<1:33:47, 207.84it/s]

finished frames 2982000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497139/1666666 [1:00:09<1:31:31, 212.99it/s]

finished frames 2982600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497227/1666666 [1:00:09<1:30:59, 214.19it/s]

finished frames 2983200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497337/1666666 [1:00:09<1:31:16, 213.51it/s]

finished frames 2983800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497425/1666666 [1:00:10<1:31:35, 212.75it/s]

finished frames 2984400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497535/1666666 [1:00:10<1:30:59, 214.16it/s]

finished frames 2985000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497623/1666666 [1:00:11<1:31:08, 213.79it/s]

finished frames 2985600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497733/1666666 [1:00:11<1:31:01, 214.05it/s]

finished frames 2986200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497843/1666666 [1:00:12<1:31:09, 213.71it/s]

finished frames 2986800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 497931/1666666 [1:00:12<1:31:11, 213.62it/s]

finished frames 2987400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498019/1666666 [1:00:13<1:35:06, 204.79it/s]

finished frames 2988000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498125/1666666 [1:00:13<1:33:51, 207.50it/s]

finished frames 2988600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498231/1666666 [1:00:14<1:33:23, 208.53it/s]

finished frames 2989200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498336/1666666 [1:00:14<1:33:19, 208.66it/s]

finished frames 2989800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498442/1666666 [1:00:15<1:33:08, 209.05it/s]

finished frames 2990400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498527/1666666 [1:00:15<1:33:20, 208.57it/s]

finished frames 2991000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498633/1666666 [1:00:16<1:33:08, 209.02it/s]

finished frames 2991600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498739/1666666 [1:00:16<1:32:54, 209.49it/s]

finished frames 2992200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498823/1666666 [1:00:17<1:33:27, 208.26it/s]

finished frames 2992800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 498929/1666666 [1:00:17<1:33:14, 208.75it/s]

finished frames 2993400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499035/1666666 [1:00:18<1:35:23, 204.00it/s]

finished frames 2994000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499140/1666666 [1:00:18<1:36:00, 202.69it/s]

finished frames 2994600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499225/1666666 [1:00:19<1:36:45, 201.08it/s]

finished frames 2995200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499331/1666666 [1:00:19<1:33:47, 207.43it/s]

finished frames 2995800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499437/1666666 [1:00:20<1:33:20, 208.40it/s]

finished frames 2996400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499543/1666666 [1:00:20<1:33:02, 209.08it/s]

finished frames 2997000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499628/1666666 [1:00:21<1:33:03, 209.01it/s]

finished frames 2997600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499735/1666666 [1:00:21<1:32:59, 209.13it/s]

finished frames 2998200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499843/1666666 [1:00:22<1:32:40, 209.84it/s]

finished frames 2998800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|██▉       | 499929/1666666 [1:00:22<1:32:34, 210.05it/s]

finished frames 2999400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500035/1666666 [1:00:22<1:34:59, 204.71it/s]

finished frames 3000000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500142/1666666 [1:00:23<1:33:09, 208.71it/s]

finished frames 3000600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500227/1666666 [1:00:23<1:32:52, 209.30it/s]

finished frames 3001200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500333/1666666 [1:00:24<1:33:01, 208.95it/s]

finished frames 3001800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500438/1666666 [1:00:24<1:32:57, 209.09it/s]

finished frames 3002400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500524/1666666 [1:00:25<1:32:43, 209.59it/s]

finished frames 3003000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500631/1666666 [1:00:25<1:32:43, 209.60it/s]

finished frames 3003600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500738/1666666 [1:00:26<1:32:45, 209.47it/s]

finished frames 3004200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500824/1666666 [1:00:26<1:32:40, 209.65it/s]

finished frames 3004800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 500929/1666666 [1:00:27<1:33:01, 208.86it/s]

finished frames 3005400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501034/1666666 [1:00:27<1:35:11, 204.09it/s]

finished frames 3006000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501140/1666666 [1:00:28<1:33:02, 208.77it/s]

finished frames 3006600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501224/1666666 [1:00:28<1:33:13, 208.36it/s]

finished frames 3007200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501331/1666666 [1:00:29<1:32:50, 209.19it/s]

finished frames 3007800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501438/1666666 [1:00:29<1:36:26, 201.36it/s]

finished frames 3008400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501522/1666666 [1:00:30<1:39:54, 194.35it/s]

finished frames 3009000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501630/1666666 [1:00:30<1:33:33, 207.54it/s]

finished frames 3009600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501738/1666666 [1:00:31<1:32:30, 209.87it/s]

finished frames 3010200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501822/1666666 [1:00:31<1:32:51, 209.08it/s]

finished frames 3010800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 501929/1666666 [1:00:32<1:32:47, 209.22it/s]

finished frames 3011400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502034/1666666 [1:00:32<1:34:54, 204.51it/s]

finished frames 3012000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502140/1666666 [1:00:33<1:33:06, 208.45it/s]

finished frames 3012600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502226/1666666 [1:00:33<1:32:41, 209.39it/s]

finished frames 3013200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502333/1666666 [1:00:34<1:32:37, 209.51it/s]

finished frames 3013800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502439/1666666 [1:00:34<1:32:53, 208.87it/s]

finished frames 3014400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502525/1666666 [1:00:34<1:32:28, 209.81it/s]

finished frames 3015000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502632/1666666 [1:00:35<1:32:26, 209.88it/s]

finished frames 3015600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502739/1666666 [1:00:36<1:32:39, 209.34it/s]

finished frames 3016200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502825/1666666 [1:00:36<1:32:32, 209.61it/s]

finished frames 3016800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 502932/1666666 [1:00:36<1:32:23, 209.93it/s]

finished frames 3017400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503038/1666666 [1:00:37<1:34:39, 204.90it/s]

finished frames 3018000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503122/1666666 [1:00:37<1:33:37, 207.12it/s]

finished frames 3018600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503227/1666666 [1:00:38<1:33:07, 208.21it/s]

finished frames 3019200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503333/1666666 [1:00:38<1:32:54, 208.67it/s]

finished frames 3019800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503440/1666666 [1:00:39<1:32:27, 209.67it/s]

finished frames 3020400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503525/1666666 [1:00:39<1:32:36, 209.34it/s]

finished frames 3021000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503631/1666666 [1:00:40<1:32:38, 209.22it/s]

finished frames 3021600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503738/1666666 [1:00:40<1:32:26, 209.66it/s]

finished frames 3022200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503822/1666666 [1:00:41<1:32:42, 209.05it/s]

finished frames 3022800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 503929/1666666 [1:00:41<1:32:41, 209.08it/s]

finished frames 3023400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504036/1666666 [1:00:42<1:34:28, 205.09it/s]

finished frames 3024000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504141/1666666 [1:00:42<1:32:58, 208.39it/s]

finished frames 3024600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504226/1666666 [1:00:43<1:32:47, 208.79it/s]

finished frames 3025200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504332/1666666 [1:00:43<1:32:48, 208.72it/s]

finished frames 3025800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504438/1666666 [1:00:44<1:32:40, 209.03it/s]

finished frames 3026400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504523/1666666 [1:00:44<1:32:40, 209.02it/s]

finished frames 3027000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504631/1666666 [1:00:45<1:31:40, 211.26it/s]

finished frames 3027600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504741/1666666 [1:00:45<1:31:03, 212.66it/s]

finished frames 3028200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504829/1666666 [1:00:46<1:32:37, 209.04it/s]

finished frames 3028800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 504938/1666666 [1:00:46<1:31:08, 212.44it/s]

finished frames 3029400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505026/1666666 [1:00:46<1:32:33, 209.18it/s]

finished frames 3030000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505135/1666666 [1:00:47<1:30:37, 213.61it/s]

finished frames 3030600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505223/1666666 [1:00:47<1:30:42, 213.41it/s]

finished frames 3031200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505333/1666666 [1:00:48<1:30:20, 214.23it/s]

finished frames 3031800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505443/1666666 [1:00:48<1:30:09, 214.66it/s]

finished frames 3032400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505531/1666666 [1:00:49<1:30:21, 214.17it/s]

finished frames 3033000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505641/1666666 [1:00:49<1:30:15, 214.40it/s]

finished frames 3033600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505729/1666666 [1:00:50<1:30:17, 214.29it/s]

finished frames 3034200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505839/1666666 [1:00:50<1:29:52, 215.26it/s]

finished frames 3034800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 505927/1666666 [1:00:51<1:29:55, 215.12it/s]

finished frames 3035400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506037/1666666 [1:00:51<1:32:21, 209.46it/s]

finished frames 3036000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506124/1666666 [1:00:52<1:41:38, 190.30it/s]

finished frames 3036600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506232/1666666 [1:00:52<1:37:45, 197.86it/s]

finished frames 3037200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506342/1666666 [1:00:53<1:30:57, 212.63it/s]

finished frames 3037800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506430/1666666 [1:00:53<1:30:07, 214.55it/s]

finished frames 3038400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506540/1666666 [1:00:54<1:29:51, 215.16it/s]

finished frames 3039000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506628/1666666 [1:00:54<1:29:29, 216.03it/s]

finished frames 3039600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506738/1666666 [1:00:55<1:29:26, 216.14it/s]

finished frames 3040200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506826/1666666 [1:00:55<1:30:25, 213.79it/s]

finished frames 3040800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 506934/1666666 [1:00:55<1:32:30, 208.95it/s]

finished frames 3041400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507040/1666666 [1:00:56<1:34:27, 204.60it/s]

finished frames 3042000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507126/1666666 [1:00:56<1:32:45, 208.33it/s]

finished frames 3042600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507233/1666666 [1:00:57<1:32:02, 209.94it/s]

finished frames 3043200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507341/1666666 [1:00:57<1:31:52, 210.29it/s]

finished frames 3043800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507429/1666666 [1:00:58<1:31:55, 210.19it/s]

finished frames 3044400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507537/1666666 [1:00:58<1:32:14, 209.45it/s]

finished frames 3045000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507624/1666666 [1:00:59<1:32:09, 209.62it/s]

finished frames 3045600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507733/1666666 [1:00:59<1:32:02, 209.84it/s]

finished frames 3046200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507841/1666666 [1:01:00<1:31:58, 209.98it/s]

finished frames 3046800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 507927/1666666 [1:01:00<1:32:02, 209.82it/s]

finished frames 3047400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 508033/1666666 [1:01:01<1:34:14, 204.92it/s]

finished frames 3048000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 508140/1666666 [1:01:01<1:32:18, 209.17it/s]

finished frames 3048600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 508226/1666666 [1:01:02<1:32:04, 209.70it/s]

finished frames 3049200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 30%|███       | 508333/1666666 [1:01:02<1:32:09, 209.47it/s]

finished frames 3049800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 508440/1666666 [1:01:03<1:31:52, 210.10it/s]

finished frames 3050400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 508527/1666666 [1:01:03<1:34:38, 203.94it/s]

finished frames 3051000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 508633/1666666 [1:01:04<1:35:27, 202.18it/s]

finished frames 3051600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 508738/1666666 [1:01:04<1:33:14, 206.98it/s]

finished frames 3052200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 508843/1666666 [1:01:05<1:32:24, 208.82it/s]

finished frames 3052800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 508928/1666666 [1:01:05<1:32:19, 209.02it/s]

finished frames 3053400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509033/1666666 [1:01:06<1:34:41, 203.76it/s]

finished frames 3054000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509138/1666666 [1:01:06<1:33:12, 206.98it/s]

finished frames 3054600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509223/1666666 [1:01:07<1:32:40, 208.16it/s]

finished frames 3055200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509328/1666666 [1:01:07<1:32:35, 208.31it/s]

finished frames 3055800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509434/1666666 [1:01:08<1:32:24, 208.71it/s]

finished frames 3056400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509539/1666666 [1:01:08<1:32:30, 208.48it/s]

finished frames 3057000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509623/1666666 [1:01:08<1:32:41, 208.05it/s]

finished frames 3057600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509728/1666666 [1:01:09<1:32:24, 208.67it/s]

finished frames 3058200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509833/1666666 [1:01:09<1:32:24, 208.66it/s]

finished frames 3058800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 509938/1666666 [1:01:10<1:32:26, 208.54it/s]

finished frames 3059400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510022/1666666 [1:01:10<1:34:47, 203.35it/s]

finished frames 3060000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510127/1666666 [1:01:11<1:33:28, 206.22it/s]

finished frames 3060600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510232/1666666 [1:01:11<1:33:09, 206.89it/s]

finished frames 3061200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510337/1666666 [1:01:12<1:33:07, 206.94it/s]

finished frames 3061800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510442/1666666 [1:01:12<1:32:49, 207.62it/s]

finished frames 3062400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510526/1666666 [1:01:13<1:32:47, 207.65it/s]

finished frames 3063000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510631/1666666 [1:01:13<1:32:48, 207.59it/s]

finished frames 3063600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510736/1666666 [1:01:14<1:32:59, 207.17it/s]

finished frames 3064200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510841/1666666 [1:01:14<1:38:20, 195.90it/s]

finished frames 3064800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 510925/1666666 [1:01:15<1:34:17, 204.27it/s]

finished frames 3065400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511030/1666666 [1:01:15<1:35:27, 201.76it/s]

finished frames 3066000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511135/1666666 [1:01:16<1:33:07, 206.80it/s]

finished frames 3066600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511240/1666666 [1:01:16<1:32:40, 207.80it/s]

finished frames 3067200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511324/1666666 [1:01:17<1:32:36, 207.91it/s]

finished frames 3067800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511430/1666666 [1:01:17<1:32:22, 208.45it/s]

finished frames 3068400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511536/1666666 [1:01:18<1:32:19, 208.51it/s]

finished frames 3069000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511641/1666666 [1:01:18<1:32:14, 208.70it/s]

finished frames 3069600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511726/1666666 [1:01:19<1:32:18, 208.52it/s]

finished frames 3070200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511831/1666666 [1:01:19<1:32:23, 208.32it/s]

finished frames 3070800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 511936/1666666 [1:01:20<1:32:30, 208.04it/s]

finished frames 3071400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512020/1666666 [1:01:20<1:35:36, 201.27it/s]

finished frames 3072000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512125/1666666 [1:01:21<1:33:04, 206.74it/s]

finished frames 3072600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512230/1666666 [1:01:21<1:32:23, 208.25it/s]

finished frames 3073200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512336/1666666 [1:01:22<1:32:03, 208.99it/s]

finished frames 3073800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512423/1666666 [1:01:22<1:31:39, 209.87it/s]

finished frames 3074400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512530/1666666 [1:01:23<1:31:33, 210.08it/s]

finished frames 3075000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512637/1666666 [1:01:23<1:31:48, 209.49it/s]

finished frames 3075600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512743/1666666 [1:01:24<1:31:39, 209.84it/s]

finished frames 3076200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512829/1666666 [1:01:24<1:32:32, 207.81it/s]

finished frames 3076800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 512937/1666666 [1:01:25<1:31:36, 209.90it/s]

finished frames 3077400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513022/1666666 [1:01:25<1:33:39, 205.28it/s]

finished frames 3078000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513128/1666666 [1:01:25<1:37:45, 196.67it/s]

finished frames 3078600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513238/1666666 [1:01:26<1:31:21, 210.41it/s]

finished frames 3079200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513326/1666666 [1:01:26<1:30:35, 212.19it/s]

finished frames 3079800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513436/1666666 [1:01:27<1:30:20, 212.75it/s]

finished frames 3080400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513524/1666666 [1:01:27<1:30:21, 212.71it/s]

finished frames 3081000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513634/1666666 [1:01:28<1:30:18, 212.81it/s]

finished frames 3081600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513722/1666666 [1:01:28<1:30:20, 212.69it/s]

finished frames 3082200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513832/1666666 [1:01:29<1:29:35, 214.46it/s]

finished frames 3082800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 513942/1666666 [1:01:29<1:29:20, 215.06it/s]

finished frames 3083400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514030/1666666 [1:01:30<1:31:14, 210.54it/s]

finished frames 3084000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514140/1666666 [1:01:30<1:29:38, 214.30it/s]

finished frames 3084600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514228/1666666 [1:01:31<1:29:33, 214.48it/s]

finished frames 3085200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514338/1666666 [1:01:31<1:29:27, 214.70it/s]

finished frames 3085800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514426/1666666 [1:01:32<1:29:25, 214.75it/s]

finished frames 3086400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514536/1666666 [1:01:32<1:29:15, 215.12it/s]

finished frames 3087000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514624/1666666 [1:01:32<1:29:22, 214.83it/s]

finished frames 3087600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514734/1666666 [1:01:33<1:29:17, 215.02it/s]

finished frames 3088200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514844/1666666 [1:01:34<1:29:28, 214.54it/s]

finished frames 3088800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 514932/1666666 [1:01:34<1:29:19, 214.89it/s]

finished frames 3089400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515020/1666666 [1:01:34<1:33:07, 206.12it/s]

finished frames 3090000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515129/1666666 [1:01:35<1:30:26, 212.22it/s]

finished frames 3090600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515239/1666666 [1:01:35<1:30:05, 213.02it/s]

finished frames 3091200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515327/1666666 [1:01:36<1:30:01, 213.15it/s]

finished frames 3091800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515437/1666666 [1:01:36<1:30:46, 211.37it/s]

finished frames 3092400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515525/1666666 [1:01:37<1:30:34, 211.82it/s]

finished frames 3093000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515635/1666666 [1:01:37<1:37:07, 197.53it/s]

finished frames 3093600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515723/1666666 [1:01:38<1:31:31, 209.60it/s]

finished frames 3094200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515833/1666666 [1:01:38<1:29:51, 213.44it/s]

finished frames 3094800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 515943/1666666 [1:01:39<1:29:21, 214.64it/s]

finished frames 3095400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516031/1666666 [1:01:39<1:31:32, 209.50it/s]

finished frames 3096000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516140/1666666 [1:01:40<1:29:59, 213.07it/s]

finished frames 3096600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516228/1666666 [1:01:40<1:30:00, 213.03it/s]

finished frames 3097200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516338/1666666 [1:01:41<1:29:44, 213.63it/s]

finished frames 3097800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516426/1666666 [1:01:41<1:29:31, 214.14it/s]

finished frames 3098400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516536/1666666 [1:01:42<1:29:08, 215.04it/s]

finished frames 3099000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516624/1666666 [1:01:42<1:28:32, 216.46it/s]

finished frames 3099600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516734/1666666 [1:01:42<1:28:32, 216.46it/s]

finished frames 3100200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516844/1666666 [1:01:43<1:28:39, 216.17it/s]

finished frames 3100800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 516932/1666666 [1:01:43<1:28:27, 216.62it/s]

finished frames 3101400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517042/1666666 [1:01:44<1:30:27, 211.82it/s]

finished frames 3102000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517130/1666666 [1:01:44<1:28:48, 215.73it/s]

finished frames 3102600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517240/1666666 [1:01:45<1:28:27, 216.55it/s]

finished frames 3103200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517328/1666666 [1:01:45<1:28:15, 217.04it/s]

finished frames 3103800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517438/1666666 [1:01:46<1:28:30, 216.42it/s]

finished frames 3104400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517526/1666666 [1:01:46<1:28:09, 217.25it/s]

finished frames 3105000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517636/1666666 [1:01:47<1:28:41, 215.91it/s]

finished frames 3105600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517724/1666666 [1:01:47<1:28:08, 217.24it/s]

finished frames 3106200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517834/1666666 [1:01:48<1:28:07, 217.26it/s]

finished frames 3106800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 517922/1666666 [1:01:48<1:36:14, 198.93it/s]

finished frames 3107400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518031/1666666 [1:01:48<1:33:20, 205.08it/s]

finished frames 3108000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518138/1666666 [1:01:49<1:31:40, 208.79it/s]

finished frames 3108600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518243/1666666 [1:01:50<1:31:28, 209.23it/s]

finished frames 3109200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518327/1666666 [1:01:50<1:31:57, 208.14it/s]

finished frames 3109800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518434/1666666 [1:01:50<1:31:31, 209.10it/s]

finished frames 3110400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518540/1666666 [1:01:51<1:31:25, 209.32it/s]

finished frames 3111000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518625/1666666 [1:01:51<1:31:35, 208.92it/s]

finished frames 3111600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518732/1666666 [1:01:52<1:31:26, 209.22it/s]

finished frames 3112200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518838/1666666 [1:01:52<1:31:21, 209.38it/s]

finished frames 3112800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 518923/1666666 [1:01:53<1:31:29, 209.08it/s]

finished frames 3113400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519029/1666666 [1:01:53<1:35:22, 200.53it/s]

finished frames 3114000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519139/1666666 [1:01:54<1:30:51, 210.51it/s]

finished frames 3114600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519227/1666666 [1:01:54<1:30:01, 212.42it/s]

finished frames 3115200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519336/1666666 [1:01:55<1:32:04, 207.67it/s]

finished frames 3115800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519443/1666666 [1:01:55<1:31:04, 209.94it/s]

finished frames 3116400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519530/1666666 [1:01:56<1:30:34, 211.10it/s]

finished frames 3117000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519637/1666666 [1:01:56<1:33:44, 203.93it/s]

finished frames 3117600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519721/1666666 [1:01:57<1:34:39, 201.95it/s]

finished frames 3118200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519826/1666666 [1:01:57<1:32:11, 207.32it/s]

finished frames 3118800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 519932/1666666 [1:01:58<1:31:43, 208.36it/s]

finished frames 3119400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 520037/1666666 [1:01:58<1:35:41, 199.72it/s]

finished frames 3120000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 520123/1666666 [1:01:59<1:32:07, 207.42it/s]

finished frames 3120600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 520232/1666666 [1:01:59<1:30:57, 210.07it/s]

finished frames 3121200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 520341/1666666 [1:02:00<1:33:35, 204.15it/s]

finished frames 3121800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 520427/1666666 [1:02:00<1:35:24, 200.23it/s]

finished frames 3122400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 520536/1666666 [1:02:01<1:31:19, 209.17it/s]

finished frames 3123000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 520623/1666666 [1:02:01<1:30:20, 211.42it/s]

finished frames 3123600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███       | 520733/1666666 [1:02:02<1:30:12, 211.72it/s]

finished frames 3124200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 520843/1666666 [1:02:02<1:30:07, 211.88it/s]

finished frames 3124800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 520931/1666666 [1:02:02<1:30:28, 211.06it/s]

finished frames 3125400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521041/1666666 [1:02:03<1:31:38, 208.36it/s]

finished frames 3126000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521128/1666666 [1:02:03<1:29:17, 213.83it/s]

finished frames 3126600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521238/1666666 [1:02:04<1:27:56, 217.08it/s]

finished frames 3127200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521326/1666666 [1:02:04<1:28:04, 216.73it/s]

finished frames 3127800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521436/1666666 [1:02:05<1:28:18, 216.15it/s]

finished frames 3128400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521524/1666666 [1:02:05<1:28:07, 216.57it/s]

finished frames 3129000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521634/1666666 [1:02:06<1:27:57, 216.96it/s]

finished frames 3129600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521744/1666666 [1:02:06<1:27:57, 216.96it/s]

finished frames 3130200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521832/1666666 [1:02:07<1:28:06, 216.58it/s]

finished frames 3130800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 521942/1666666 [1:02:07<1:27:51, 217.13it/s]

finished frames 3131400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522030/1666666 [1:02:08<1:30:23, 211.05it/s]

finished frames 3132000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522140/1666666 [1:02:08<1:28:40, 215.12it/s]

finished frames 3132600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522228/1666666 [1:02:09<1:28:43, 214.98it/s]

finished frames 3133200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522338/1666666 [1:02:09<1:28:59, 214.33it/s]

finished frames 3133800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522426/1666666 [1:02:09<1:29:03, 214.13it/s]

finished frames 3134400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522536/1666666 [1:02:10<1:28:31, 215.40it/s]

finished frames 3135000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522624/1666666 [1:02:10<1:28:45, 214.82it/s]

finished frames 3135600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522733/1666666 [1:02:11<1:30:25, 210.85it/s]

finished frames 3136200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522843/1666666 [1:02:11<1:32:00, 207.20it/s]

finished frames 3136800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 522930/1666666 [1:02:12<1:29:47, 212.28it/s]

finished frames 3137400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523040/1666666 [1:02:12<1:31:13, 208.92it/s]

finished frames 3138000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523127/1666666 [1:02:13<1:29:57, 211.86it/s]

finished frames 3138600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523237/1666666 [1:02:13<1:29:32, 212.83it/s]

finished frames 3139200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523325/1666666 [1:02:14<1:29:14, 213.51it/s]

finished frames 3139800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523435/1666666 [1:02:14<1:29:03, 213.94it/s]

finished frames 3140400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523523/1666666 [1:02:15<1:29:10, 213.65it/s]

finished frames 3141000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523633/1666666 [1:02:15<1:28:51, 214.39it/s]

finished frames 3141600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523743/1666666 [1:02:16<1:29:16, 213.35it/s]

finished frames 3142200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523831/1666666 [1:02:16<1:29:15, 213.39it/s]

finished frames 3142800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 523941/1666666 [1:02:17<1:29:02, 213.88it/s]

finished frames 3143400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524029/1666666 [1:02:17<1:31:01, 209.23it/s]

finished frames 3144000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524139/1666666 [1:02:18<1:29:46, 212.12it/s]

finished frames 3144600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524227/1666666 [1:02:18<1:29:13, 213.39it/s]

finished frames 3145200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524337/1666666 [1:02:18<1:29:10, 213.50it/s]

finished frames 3145800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524425/1666666 [1:02:19<1:29:17, 213.19it/s]

finished frames 3146400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524535/1666666 [1:02:19<1:28:59, 213.89it/s]

finished frames 3147000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524623/1666666 [1:02:20<1:28:57, 213.98it/s]

finished frames 3147600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524733/1666666 [1:02:20<1:28:55, 214.04it/s]

finished frames 3148200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524843/1666666 [1:02:21<1:28:55, 213.99it/s]

finished frames 3148800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 31%|███▏      | 524931/1666666 [1:02:21<1:29:05, 213.58it/s]

finished frames 3149400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525019/1666666 [1:02:22<1:31:46, 207.32it/s]

finished frames 3150000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525127/1666666 [1:02:22<1:30:53, 209.32it/s]

finished frames 3150600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525237/1666666 [1:02:23<1:31:23, 208.16it/s]

finished frames 3151200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525324/1666666 [1:02:23<1:29:34, 212.37it/s]

finished frames 3151800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525434/1666666 [1:02:24<1:28:31, 214.86it/s]

finished frames 3152400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525522/1666666 [1:02:24<1:29:34, 212.31it/s]

finished frames 3153000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525632/1666666 [1:02:25<1:30:03, 211.15it/s]

finished frames 3153600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525742/1666666 [1:02:25<1:29:44, 211.88it/s]

finished frames 3154200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525830/1666666 [1:02:26<1:29:53, 211.53it/s]

finished frames 3154800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 525940/1666666 [1:02:26<1:29:48, 211.69it/s]

finished frames 3155400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526027/1666666 [1:02:27<1:32:11, 206.22it/s]

finished frames 3156000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526135/1666666 [1:02:27<1:30:09, 210.82it/s]

finished frames 3156600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526223/1666666 [1:02:27<1:29:59, 211.21it/s]

finished frames 3157200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526333/1666666 [1:02:28<1:29:43, 211.81it/s]

finished frames 3157800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526443/1666666 [1:02:28<1:29:51, 211.48it/s]

finished frames 3158400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526531/1666666 [1:02:29<1:29:28, 212.38it/s]

finished frames 3159000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526641/1666666 [1:02:29<1:29:45, 211.68it/s]

finished frames 3159600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526729/1666666 [1:02:30<1:30:00, 211.06it/s]

finished frames 3160200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526839/1666666 [1:02:30<1:30:02, 211.00it/s]

finished frames 3160800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 526927/1666666 [1:02:31<1:30:06, 210.81it/s]

finished frames 3161400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527036/1666666 [1:02:31<1:32:29, 205.35it/s]

finished frames 3162000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527142/1666666 [1:02:32<1:30:59, 208.73it/s]

finished frames 3162600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527226/1666666 [1:02:32<1:30:52, 208.97it/s]

finished frames 3163200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527332/1666666 [1:02:33<1:30:41, 209.37it/s]

finished frames 3163800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527438/1666666 [1:02:33<1:30:35, 209.60it/s]

finished frames 3164400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527523/1666666 [1:02:34<1:38:40, 192.42it/s]

finished frames 3165000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527629/1666666 [1:02:34<1:32:00, 206.31it/s]

finished frames 3165600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527737/1666666 [1:02:35<1:30:23, 209.98it/s]

finished frames 3166200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527824/1666666 [1:02:35<1:30:13, 210.38it/s]

finished frames 3166800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 527931/1666666 [1:02:36<1:30:47, 209.02it/s]

finished frames 3167400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528037/1666666 [1:02:36<1:32:41, 204.73it/s]

finished frames 3168000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528123/1666666 [1:02:37<1:31:22, 207.68it/s]

finished frames 3168600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528231/1666666 [1:02:37<1:30:40, 209.26it/s]

finished frames 3169200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528338/1666666 [1:02:38<1:30:32, 209.53it/s]

finished frames 3169800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528423/1666666 [1:02:38<1:30:41, 209.19it/s]

finished frames 3170400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528530/1666666 [1:02:39<1:30:46, 208.98it/s]

finished frames 3171000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528635/1666666 [1:02:39<1:31:05, 208.20it/s]

finished frames 3171600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528741/1666666 [1:02:40<1:31:04, 208.25it/s]

finished frames 3172200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528826/1666666 [1:02:40<1:31:30, 207.25it/s]

finished frames 3172800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 528933/1666666 [1:02:40<1:30:37, 209.22it/s]

finished frames 3173400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529039/1666666 [1:02:41<1:32:41, 204.54it/s]

finished frames 3174000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529123/1666666 [1:02:41<1:31:41, 206.78it/s]

finished frames 3174600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529230/1666666 [1:02:42<1:30:47, 208.81it/s]

finished frames 3175200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529336/1666666 [1:02:42<1:30:56, 208.44it/s]

finished frames 3175800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529422/1666666 [1:02:43<1:30:41, 208.99it/s]

finished frames 3176400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529530/1666666 [1:02:43<1:30:30, 209.39it/s]

finished frames 3177000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529637/1666666 [1:02:44<1:30:32, 209.31it/s]

finished frames 3177600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529722/1666666 [1:02:44<1:30:51, 208.56it/s]

finished frames 3178200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529828/1666666 [1:02:45<1:32:41, 204.41it/s]

finished frames 3178800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 529935/1666666 [1:02:45<1:33:29, 202.64it/s]

finished frames 3179400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530021/1666666 [1:02:46<1:33:34, 202.47it/s]

finished frames 3180000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530129/1666666 [1:02:46<1:30:15, 209.86it/s]

finished frames 3180600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530239/1666666 [1:02:47<1:29:19, 212.04it/s]

finished frames 3181200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530327/1666666 [1:02:47<1:28:36, 213.75it/s]

finished frames 3181800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530437/1666666 [1:02:48<1:28:53, 213.04it/s]

finished frames 3182400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530525/1666666 [1:02:48<1:29:03, 212.64it/s]

finished frames 3183000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530635/1666666 [1:02:49<1:29:39, 211.16it/s]

finished frames 3183600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530742/1666666 [1:02:49<1:30:23, 209.45it/s]

finished frames 3184200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530827/1666666 [1:02:50<1:30:54, 208.25it/s]

finished frames 3184800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 530932/1666666 [1:02:50<1:31:04, 207.82it/s]

finished frames 3185400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531037/1666666 [1:02:51<1:32:50, 203.87it/s]

finished frames 3186000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531142/1666666 [1:02:51<1:31:28, 206.90it/s]

finished frames 3186600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531226/1666666 [1:02:52<1:32:41, 204.16it/s]

finished frames 3187200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531331/1666666 [1:02:52<1:31:57, 205.77it/s]

finished frames 3187800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531438/1666666 [1:02:53<1:30:41, 208.61it/s]

finished frames 3188400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531523/1666666 [1:02:53<1:30:21, 209.38it/s]

finished frames 3189000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531629/1666666 [1:02:53<1:30:18, 209.47it/s]

finished frames 3189600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531737/1666666 [1:02:54<1:29:51, 210.51it/s]

finished frames 3190200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531824/1666666 [1:02:54<1:29:51, 210.47it/s]

finished frames 3190800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 531932/1666666 [1:02:55<1:30:19, 209.39it/s]

finished frames 3191400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532040/1666666 [1:02:55<1:31:19, 207.08it/s]

finished frames 3192000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532126/1666666 [1:02:56<1:33:26, 202.34it/s]

finished frames 3192600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532235/1666666 [1:02:56<1:29:27, 211.36it/s]

finished frames 3193200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532323/1666666 [1:02:57<1:28:44, 213.04it/s]

finished frames 3193800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532433/1666666 [1:02:57<1:27:58, 214.88it/s]

finished frames 3194400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532543/1666666 [1:02:58<1:27:56, 214.93it/s]

finished frames 3195000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532631/1666666 [1:02:58<1:28:05, 214.56it/s]

finished frames 3195600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532741/1666666 [1:02:59<1:27:50, 215.14it/s]

finished frames 3196200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532829/1666666 [1:02:59<1:27:53, 215.01it/s]

finished frames 3196800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 532939/1666666 [1:03:00<1:27:45, 215.33it/s]

finished frames 3197400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533026/1666666 [1:03:00<1:30:43, 208.24it/s]

finished frames 3198000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533133/1666666 [1:03:01<1:30:07, 209.62it/s]

finished frames 3198600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533241/1666666 [1:03:01<1:29:56, 210.02it/s]

finished frames 3199200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533326/1666666 [1:03:02<1:30:28, 208.76it/s]

finished frames 3199800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533432/1666666 [1:03:02<1:30:22, 208.98it/s]

finished frames 3200400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533537/1666666 [1:03:03<1:30:34, 208.51it/s]

finished frames 3201000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533642/1666666 [1:03:03<1:30:25, 208.83it/s]

finished frames 3201600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533726/1666666 [1:03:03<1:30:29, 208.67it/s]

finished frames 3202200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533831/1666666 [1:03:04<1:30:29, 208.66it/s]

finished frames 3202800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 533936/1666666 [1:03:04<1:30:26, 208.73it/s]

finished frames 3203400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534020/1666666 [1:03:05<1:33:23, 202.12it/s]

finished frames 3204000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534125/1666666 [1:03:05<1:31:19, 206.69it/s]

finished frames 3204600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534230/1666666 [1:03:06<1:31:06, 207.16it/s]

finished frames 3205200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534335/1666666 [1:03:06<1:30:49, 207.80it/s]

finished frames 3205800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534419/1666666 [1:03:07<1:30:40, 208.10it/s]

finished frames 3206400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534524/1666666 [1:03:07<1:32:16, 204.48it/s]

finished frames 3207000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534630/1666666 [1:03:08<1:33:30, 201.77it/s]

finished frames 3207600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534735/1666666 [1:03:08<1:31:00, 207.30it/s]

finished frames 3208200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534841/1666666 [1:03:09<1:30:15, 209.00it/s]

finished frames 3208800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 534925/1666666 [1:03:09<1:30:18, 208.86it/s]

finished frames 3209400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535031/1666666 [1:03:10<1:32:25, 204.07it/s]

finished frames 3210000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535137/1666666 [1:03:10<1:30:34, 208.22it/s]

finished frames 3210600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535242/1666666 [1:03:11<1:30:29, 208.38it/s]

finished frames 3211200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535326/1666666 [1:03:11<1:30:22, 208.62it/s]

finished frames 3211800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535432/1666666 [1:03:12<1:30:18, 208.78it/s]

finished frames 3212400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535537/1666666 [1:03:12<1:30:28, 208.36it/s]

finished frames 3213000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535642/1666666 [1:03:13<1:30:19, 208.68it/s]

finished frames 3213600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535727/1666666 [1:03:13<1:30:10, 209.02it/s]

finished frames 3214200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535836/1666666 [1:03:14<1:28:34, 212.76it/s]

finished frames 3214800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 535924/1666666 [1:03:14<1:27:51, 214.51it/s]

finished frames 3215400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536034/1666666 [1:03:15<1:30:18, 208.66it/s]

finished frames 3216000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536144/1666666 [1:03:15<1:27:51, 214.47it/s]

finished frames 3216600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536232/1666666 [1:03:16<1:27:31, 215.26it/s]

finished frames 3217200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536342/1666666 [1:03:16<1:27:50, 214.45it/s]

finished frames 3217800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536430/1666666 [1:03:16<1:27:55, 214.26it/s]

finished frames 3218400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536540/1666666 [1:03:17<1:27:37, 214.95it/s]

finished frames 3219000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536628/1666666 [1:03:17<1:27:36, 214.99it/s]

finished frames 3219600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536738/1666666 [1:03:18<1:27:50, 214.40it/s]

finished frames 3220200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536826/1666666 [1:03:18<1:35:48, 196.54it/s]

finished frames 3220800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 536935/1666666 [1:03:19<1:29:12, 211.09it/s]

finished frames 3221400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537022/1666666 [1:03:19<1:30:43, 207.52it/s]

finished frames 3222000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537132/1666666 [1:03:20<1:28:10, 213.51it/s]

finished frames 3222600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537242/1666666 [1:03:20<1:27:20, 215.54it/s]

finished frames 3223200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537330/1666666 [1:03:21<1:27:23, 215.39it/s]

finished frames 3223800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537440/1666666 [1:03:21<1:27:21, 215.44it/s]

finished frames 3224400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537528/1666666 [1:03:22<1:27:26, 215.21it/s]

finished frames 3225000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537638/1666666 [1:03:22<1:27:49, 214.24it/s]

finished frames 3225600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537726/1666666 [1:03:23<1:27:33, 214.91it/s]

finished frames 3226200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537836/1666666 [1:03:23<1:27:12, 215.73it/s]

finished frames 3226800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 537924/1666666 [1:03:23<1:27:24, 215.22it/s]

finished frames 3227400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538034/1666666 [1:03:24<1:30:34, 207.67it/s]

finished frames 3228000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538144/1666666 [1:03:25<1:27:31, 214.89it/s]

finished frames 3228600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538232/1666666 [1:03:25<1:26:58, 216.22it/s]

finished frames 3229200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538342/1666666 [1:03:25<1:26:58, 216.20it/s]

finished frames 3229800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538430/1666666 [1:03:26<1:26:54, 216.39it/s]

finished frames 3230400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538540/1666666 [1:03:26<1:26:47, 216.62it/s]

finished frames 3231000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538628/1666666 [1:03:27<1:26:53, 216.35it/s]

finished frames 3231600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538738/1666666 [1:03:27<1:26:44, 216.70it/s]

finished frames 3232200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538826/1666666 [1:03:28<1:26:49, 216.49it/s]

finished frames 3232800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 538936/1666666 [1:03:28<1:26:47, 216.54it/s]

finished frames 3233400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539024/1666666 [1:03:29<1:28:46, 211.71it/s]

finished frames 3234000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539134/1666666 [1:03:29<1:27:11, 215.51it/s]

finished frames 3234600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539244/1666666 [1:03:30<1:26:52, 216.28it/s]

finished frames 3235200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539332/1666666 [1:03:30<1:26:38, 216.87it/s]

finished frames 3235800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539442/1666666 [1:03:31<1:27:08, 215.58it/s]

finished frames 3236400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539530/1666666 [1:03:31<1:26:51, 216.29it/s]

finished frames 3237000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539640/1666666 [1:03:31<1:26:41, 216.65it/s]

finished frames 3237600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539728/1666666 [1:03:32<1:26:59, 215.89it/s]

finished frames 3238200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539838/1666666 [1:03:32<1:26:43, 216.57it/s]

finished frames 3238800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 539926/1666666 [1:03:33<1:27:10, 215.43it/s]

finished frames 3239400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540036/1666666 [1:03:33<1:28:53, 211.24it/s]

finished frames 3240000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540124/1666666 [1:03:34<1:27:29, 214.59it/s]

finished frames 3240600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540234/1666666 [1:03:34<1:26:59, 215.80it/s]

finished frames 3241200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540344/1666666 [1:03:35<1:26:40, 216.57it/s]

finished frames 3241800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540432/1666666 [1:03:35<1:26:38, 216.64it/s]

finished frames 3242400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540542/1666666 [1:03:36<1:26:53, 215.98it/s]

finished frames 3243000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540630/1666666 [1:03:36<1:26:40, 216.54it/s]

finished frames 3243600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540740/1666666 [1:03:37<1:26:46, 216.25it/s]

finished frames 3244200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540828/1666666 [1:03:37<1:26:44, 216.34it/s]

finished frames 3244800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 540938/1666666 [1:03:38<1:26:43, 216.35it/s]

finished frames 3245400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 541026/1666666 [1:03:38<1:28:27, 212.09it/s]

finished frames 3246000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 541136/1666666 [1:03:38<1:27:04, 215.41it/s]

finished frames 3246600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 541224/1666666 [1:03:39<1:26:51, 215.97it/s]

finished frames 3247200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 541334/1666666 [1:03:39<1:26:50, 215.96it/s]

finished frames 3247800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 541444/1666666 [1:03:40<1:26:50, 215.97it/s]

finished frames 3248400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 541532/1666666 [1:03:40<1:26:56, 215.67it/s]

finished frames 3249000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 32%|███▏      | 541620/1666666 [1:03:41<1:26:59, 215.54it/s]

finished frames 3249600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 541730/1666666 [1:03:41<1:28:41, 211.40it/s]

finished frames 3250200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 541840/1666666 [1:03:42<1:29:39, 209.10it/s]

finished frames 3250800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 541928/1666666 [1:03:42<1:27:30, 214.23it/s]

finished frames 3251400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542038/1666666 [1:03:43<1:28:43, 211.24it/s]

finished frames 3252000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542126/1666666 [1:03:43<1:27:14, 214.83it/s]

finished frames 3252600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542236/1666666 [1:03:44<1:26:41, 216.19it/s]

finished frames 3253200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542324/1666666 [1:03:44<1:26:40, 216.19it/s]

finished frames 3253800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542434/1666666 [1:03:45<1:26:37, 216.32it/s]

finished frames 3254400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542544/1666666 [1:03:45<1:26:41, 216.13it/s]

finished frames 3255000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542632/1666666 [1:03:45<1:26:35, 216.34it/s]

finished frames 3255600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542742/1666666 [1:03:46<1:26:55, 215.50it/s]

finished frames 3256200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542830/1666666 [1:03:46<1:26:44, 215.95it/s]

finished frames 3256800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 542940/1666666 [1:03:47<1:27:14, 214.69it/s]

finished frames 3257400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543028/1666666 [1:03:47<1:28:52, 210.73it/s]

finished frames 3258000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543138/1666666 [1:03:48<1:27:08, 214.88it/s]

finished frames 3258600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543226/1666666 [1:03:48<1:26:46, 215.76it/s]

finished frames 3259200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543336/1666666 [1:03:49<1:26:45, 215.79it/s]

finished frames 3259800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543425/1666666 [1:03:49<1:26:13, 217.10it/s]

finished frames 3260400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543535/1666666 [1:03:50<1:26:44, 215.78it/s]

finished frames 3261000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543623/1666666 [1:03:50<1:26:41, 215.91it/s]

finished frames 3261600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543733/1666666 [1:03:51<1:26:35, 216.15it/s]

finished frames 3262200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543843/1666666 [1:03:51<1:26:39, 215.94it/s]

finished frames 3262800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 543931/1666666 [1:03:51<1:26:41, 215.85it/s]

finished frames 3263400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544041/1666666 [1:03:52<1:28:53, 210.49it/s]

finished frames 3264000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544129/1666666 [1:03:52<1:27:38, 213.48it/s]

finished frames 3264600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544239/1666666 [1:03:53<1:29:55, 208.03it/s]

finished frames 3265200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544325/1666666 [1:03:53<1:28:23, 211.63it/s]

finished frames 3265800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544435/1666666 [1:03:54<1:28:02, 212.46it/s]

finished frames 3266400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544523/1666666 [1:03:54<1:27:27, 213.85it/s]

finished frames 3267000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544633/1666666 [1:03:55<1:29:32, 208.85it/s]

finished frames 3267600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544742/1666666 [1:03:55<1:27:51, 212.83it/s]

finished frames 3268200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544830/1666666 [1:03:56<1:28:02, 212.38it/s]

finished frames 3268800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 544938/1666666 [1:03:56<1:31:23, 204.57it/s]

finished frames 3269400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545021/1666666 [1:03:57<1:34:45, 197.28it/s]

finished frames 3270000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545125/1666666 [1:03:57<1:30:52, 205.68it/s]

finished frames 3270600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545230/1666666 [1:03:58<1:30:03, 207.52it/s]

finished frames 3271200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545335/1666666 [1:03:58<1:32:11, 202.71it/s]

finished frames 3271800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545440/1666666 [1:03:59<1:30:05, 207.43it/s]

finished frames 3272400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545524/1666666 [1:03:59<1:30:06, 207.38it/s]

finished frames 3273000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545629/1666666 [1:04:00<1:30:02, 207.51it/s]

finished frames 3273600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545735/1666666 [1:04:00<1:29:48, 208.02it/s]

finished frames 3274200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545840/1666666 [1:04:01<1:29:52, 207.86it/s]

finished frames 3274800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 545924/1666666 [1:04:01<1:29:59, 207.56it/s]

finished frames 3275400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546029/1666666 [1:04:02<1:32:03, 202.87it/s]

finished frames 3276000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546135/1666666 [1:04:02<1:30:04, 207.34it/s]

finished frames 3276600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546240/1666666 [1:04:03<1:29:58, 207.56it/s]

finished frames 3277200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546325/1666666 [1:04:03<1:29:32, 208.54it/s]

finished frames 3277800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546430/1666666 [1:04:04<1:33:15, 200.19it/s]

finished frames 3278400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546536/1666666 [1:04:04<1:30:01, 207.36it/s]

finished frames 3279000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546642/1666666 [1:04:05<1:29:15, 209.15it/s]

finished frames 3279600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546726/1666666 [1:04:05<1:29:22, 208.84it/s]

finished frames 3280200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546831/1666666 [1:04:05<1:29:37, 208.24it/s]

finished frames 3280800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 546938/1666666 [1:04:06<1:29:08, 209.36it/s]

finished frames 3281400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547023/1666666 [1:04:06<1:31:14, 204.51it/s]

finished frames 3282000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547130/1666666 [1:04:07<1:29:18, 208.93it/s]

finished frames 3282600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547236/1666666 [1:04:07<1:29:16, 208.98it/s]

finished frames 3283200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547343/1666666 [1:04:08<1:28:57, 209.71it/s]

finished frames 3283800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547427/1666666 [1:04:08<1:29:15, 208.98it/s]

finished frames 3284400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547533/1666666 [1:04:09<1:29:06, 209.33it/s]

finished frames 3285000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547638/1666666 [1:04:09<1:29:12, 209.06it/s]

finished frames 3285600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547723/1666666 [1:04:10<1:29:02, 209.42it/s]

finished frames 3286200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547832/1666666 [1:04:10<1:28:32, 210.60it/s]

finished frames 3286800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 547942/1666666 [1:04:11<1:28:16, 211.20it/s]

finished frames 3287400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548027/1666666 [1:04:11<1:30:42, 205.56it/s]

finished frames 3288000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548132/1666666 [1:04:12<1:29:50, 207.50it/s]

finished frames 3288600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548237/1666666 [1:04:12<1:29:41, 207.83it/s]

finished frames 3289200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548342/1666666 [1:04:13<1:29:32, 208.14it/s]

finished frames 3289800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548426/1666666 [1:04:13<1:29:19, 208.65it/s]

finished frames 3290400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548532/1666666 [1:04:14<1:29:13, 208.86it/s]

finished frames 3291000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548637/1666666 [1:04:14<1:29:36, 207.94it/s]

finished frames 3291600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548742/1666666 [1:04:15<1:29:26, 208.32it/s]

finished frames 3292200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548827/1666666 [1:04:15<1:29:25, 208.32it/s]

finished frames 3292800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 548934/1666666 [1:04:16<1:31:35, 203.38it/s]

finished frames 3293400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549040/1666666 [1:04:16<1:31:34, 203.42it/s]

finished frames 3294000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549125/1666666 [1:04:17<1:29:38, 207.77it/s]

finished frames 3294600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549231/1666666 [1:04:17<1:28:55, 209.42it/s]

finished frames 3295200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549336/1666666 [1:04:18<1:29:03, 209.10it/s]

finished frames 3295800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549422/1666666 [1:04:18<1:29:00, 209.20it/s]

finished frames 3296400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549530/1666666 [1:04:18<1:28:53, 209.46it/s]

finished frames 3297000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549637/1666666 [1:04:19<1:28:42, 209.89it/s]

finished frames 3297600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549722/1666666 [1:04:19<1:28:56, 209.29it/s]

finished frames 3298200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549828/1666666 [1:04:20<1:29:28, 208.05it/s]

finished frames 3298800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 549933/1666666 [1:04:20<1:29:18, 208.41it/s]

finished frames 3299400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550038/1666666 [1:04:21<1:31:22, 203.66it/s]

finished frames 3300000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550122/1666666 [1:04:21<1:29:47, 207.24it/s]

finished frames 3300600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550229/1666666 [1:04:22<1:28:57, 209.18it/s]

finished frames 3301200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550335/1666666 [1:04:22<1:29:00, 209.05it/s]

finished frames 3301800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550440/1666666 [1:04:23<1:29:21, 208.20it/s]

finished frames 3302400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550524/1666666 [1:04:23<1:29:27, 207.95it/s]

finished frames 3303000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550630/1666666 [1:04:24<1:29:16, 208.36it/s]

finished frames 3303600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550737/1666666 [1:04:24<1:28:57, 209.08it/s]

finished frames 3304200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550843/1666666 [1:04:25<1:29:00, 208.94it/s]

finished frames 3304800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 550929/1666666 [1:04:25<1:28:47, 209.41it/s]

finished frames 3305400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551034/1666666 [1:04:26<1:31:17, 203.67it/s]

finished frames 3306000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551139/1666666 [1:04:26<1:32:18, 201.40it/s]

finished frames 3306600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551223/1666666 [1:04:27<1:30:11, 206.13it/s]

finished frames 3307200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551329/1666666 [1:04:27<1:29:13, 208.32it/s]

finished frames 3307800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551434/1666666 [1:04:28<1:29:14, 208.28it/s]

finished frames 3308400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551539/1666666 [1:04:28<1:29:17, 208.16it/s]

finished frames 3309000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551623/1666666 [1:04:29<1:29:24, 207.87it/s]

finished frames 3309600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551729/1666666 [1:04:29<1:29:13, 208.25it/s]

finished frames 3310200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551838/1666666 [1:04:30<1:27:37, 212.04it/s]

finished frames 3310800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 551926/1666666 [1:04:30<1:27:28, 212.39it/s]

finished frames 3311400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552036/1666666 [1:04:31<1:28:33, 209.78it/s]

finished frames 3312000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552123/1666666 [1:04:31<1:27:55, 211.25it/s]

finished frames 3312600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552233/1666666 [1:04:32<1:26:57, 213.58it/s]

finished frames 3313200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552343/1666666 [1:04:32<1:26:32, 214.62it/s]

finished frames 3313800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552431/1666666 [1:04:32<1:26:45, 214.07it/s]

finished frames 3314400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552541/1666666 [1:04:33<1:26:37, 214.35it/s]

finished frames 3315000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552629/1666666 [1:04:33<1:26:33, 214.51it/s]

finished frames 3315600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552739/1666666 [1:04:34<1:26:24, 214.85it/s]

finished frames 3316200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552827/1666666 [1:04:34<1:26:39, 214.23it/s]

finished frames 3316800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 552937/1666666 [1:04:35<1:26:28, 214.64it/s]

finished frames 3317400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553025/1666666 [1:04:35<1:28:03, 210.77it/s]

finished frames 3318000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553135/1666666 [1:04:36<1:26:43, 214.02it/s]

finished frames 3318600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553223/1666666 [1:04:36<1:26:38, 214.20it/s]

finished frames 3319200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553333/1666666 [1:04:37<1:26:30, 214.49it/s]

finished frames 3319800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553443/1666666 [1:04:37<1:26:11, 215.26it/s]

finished frames 3320400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553531/1666666 [1:04:38<1:26:31, 214.40it/s]

finished frames 3321000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553641/1666666 [1:04:38<1:29:13, 207.90it/s]

finished frames 3321600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553728/1666666 [1:04:39<1:27:16, 212.55it/s]

finished frames 3322200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553838/1666666 [1:04:39<1:26:47, 213.71it/s]

finished frames 3322800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 553926/1666666 [1:04:39<1:26:44, 213.81it/s]

finished frames 3323400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554036/1666666 [1:04:40<1:28:15, 210.13it/s]

finished frames 3324000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554124/1666666 [1:04:40<1:26:34, 214.16it/s]

finished frames 3324600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554234/1666666 [1:04:41<1:26:06, 215.30it/s]

finished frames 3325200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554344/1666666 [1:04:41<1:25:56, 215.72it/s]

finished frames 3325800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554432/1666666 [1:04:42<1:26:05, 215.31it/s]

finished frames 3326400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554542/1666666 [1:04:42<1:26:00, 215.49it/s]

finished frames 3327000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554630/1666666 [1:04:43<1:25:57, 215.63it/s]

finished frames 3327600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554740/1666666 [1:04:43<1:26:01, 215.43it/s]

finished frames 3328200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554828/1666666 [1:04:44<1:25:58, 215.54it/s]

finished frames 3328800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 554938/1666666 [1:04:44<1:26:03, 215.30it/s]

finished frames 3329400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555026/1666666 [1:04:45<1:28:06, 210.29it/s]

finished frames 3330000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555136/1666666 [1:04:45<1:26:26, 214.32it/s]

finished frames 3330600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555224/1666666 [1:04:46<1:26:08, 215.03it/s]

finished frames 3331200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555334/1666666 [1:04:46<1:26:00, 215.34it/s]

finished frames 3331800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555422/1666666 [1:04:46<1:26:07, 215.06it/s]

finished frames 3332400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555531/1666666 [1:04:47<1:28:44, 208.70it/s]

finished frames 3333000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555639/1666666 [1:04:47<1:27:59, 210.46it/s]

finished frames 3333600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555727/1666666 [1:04:48<1:26:53, 213.09it/s]

finished frames 3334200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555837/1666666 [1:04:48<1:26:14, 214.68it/s]

finished frames 3334800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 555925/1666666 [1:04:49<1:28:57, 208.08it/s]

finished frames 3335400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556035/1666666 [1:04:49<1:28:37, 208.85it/s]

finished frames 3336000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556144/1666666 [1:04:50<1:26:33, 213.83it/s]

finished frames 3336600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556232/1666666 [1:04:50<1:26:17, 214.49it/s]

finished frames 3337200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556342/1666666 [1:04:51<1:26:18, 214.42it/s]

finished frames 3337800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556430/1666666 [1:04:51<1:26:18, 214.41it/s]

finished frames 3338400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556540/1666666 [1:04:52<1:26:02, 215.02it/s]

finished frames 3339000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556628/1666666 [1:04:52<1:25:51, 215.47it/s]

finished frames 3339600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556738/1666666 [1:04:53<1:25:53, 215.37it/s]

finished frames 3340200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556826/1666666 [1:04:53<1:25:30, 216.33it/s]

finished frames 3340800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 556936/1666666 [1:04:54<1:25:59, 215.07it/s]

finished frames 3341400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557024/1666666 [1:04:54<1:27:44, 210.79it/s]

finished frames 3342000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557134/1666666 [1:04:55<1:26:15, 214.36it/s]

finished frames 3342600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557244/1666666 [1:04:55<1:26:45, 213.11it/s]

finished frames 3343200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557332/1666666 [1:04:55<1:26:41, 213.25it/s]

finished frames 3343800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557442/1666666 [1:04:56<1:26:30, 213.68it/s]

finished frames 3344400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557530/1666666 [1:04:56<1:26:26, 213.83it/s]

finished frames 3345000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557640/1666666 [1:04:57<1:26:40, 213.26it/s]

finished frames 3345600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557728/1666666 [1:04:57<1:26:50, 212.82it/s]

finished frames 3346200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557838/1666666 [1:04:58<1:26:48, 212.88it/s]

finished frames 3346800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 557926/1666666 [1:04:58<1:26:44, 213.02it/s]

finished frames 3347400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 558036/1666666 [1:04:59<1:28:11, 209.50it/s]

finished frames 3348000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 558124/1666666 [1:04:59<1:26:31, 213.51it/s]

finished frames 3348600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 558234/1666666 [1:05:00<1:26:31, 213.52it/s]

finished frames 3349200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 33%|███▎      | 558322/1666666 [1:05:00<1:29:36, 206.16it/s]

finished frames 3349800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 558430/1666666 [1:05:01<1:30:39, 203.73it/s]

finished frames 3350400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 558538/1666666 [1:05:01<1:28:08, 209.55it/s]

finished frames 3351000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 558625/1666666 [1:05:02<1:27:42, 210.55it/s]

finished frames 3351600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 558735/1666666 [1:05:02<1:27:42, 210.52it/s]

finished frames 3352200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 558823/1666666 [1:05:03<1:27:28, 211.06it/s]

finished frames 3352800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 558932/1666666 [1:05:03<1:27:33, 210.87it/s]

finished frames 3353400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559020/1666666 [1:05:03<1:30:04, 204.94it/s]

finished frames 3354000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559129/1666666 [1:05:04<1:28:00, 209.73it/s]

finished frames 3354600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559237/1666666 [1:05:05<1:27:24, 211.14it/s]

finished frames 3355200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559325/1666666 [1:05:05<1:27:27, 211.04it/s]

finished frames 3355800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559435/1666666 [1:05:05<1:27:31, 210.84it/s]

finished frames 3356400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559523/1666666 [1:05:06<1:27:45, 210.24it/s]

finished frames 3357000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559633/1666666 [1:05:06<1:27:37, 210.54it/s]

finished frames 3357600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559743/1666666 [1:05:07<1:27:25, 211.02it/s]

finished frames 3358200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559831/1666666 [1:05:07<1:27:13, 211.49it/s]

finished frames 3358800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 559941/1666666 [1:05:08<1:27:18, 211.25it/s]

finished frames 3359400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560029/1666666 [1:05:08<1:28:52, 207.51it/s]

finished frames 3360000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560138/1666666 [1:05:09<1:27:08, 211.62it/s]

finished frames 3360600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560226/1666666 [1:05:09<1:26:44, 212.59it/s]

finished frames 3361200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560336/1666666 [1:05:10<1:27:15, 211.33it/s]

finished frames 3361800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560424/1666666 [1:05:10<1:27:21, 211.05it/s]

finished frames 3362400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560533/1666666 [1:05:11<1:27:36, 210.42it/s]

finished frames 3363000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560643/1666666 [1:05:11<1:27:13, 211.32it/s]

finished frames 3363600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560731/1666666 [1:05:12<1:27:02, 211.75it/s]

finished frames 3364200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560841/1666666 [1:05:12<1:26:39, 212.69it/s]

finished frames 3364800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 560929/1666666 [1:05:13<1:26:59, 211.85it/s]

finished frames 3365400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561038/1666666 [1:05:13<1:29:26, 206.02it/s]

finished frames 3366000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561125/1666666 [1:05:14<1:28:03, 209.24it/s]

finished frames 3366600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561234/1666666 [1:05:14<1:27:24, 210.77it/s]

finished frames 3367200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561322/1666666 [1:05:14<1:27:14, 211.18it/s]

finished frames 3367800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561432/1666666 [1:05:15<1:26:59, 211.76it/s]

finished frames 3368400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561542/1666666 [1:05:15<1:27:08, 211.37it/s]

finished frames 3369000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561630/1666666 [1:05:16<1:27:13, 211.13it/s]

finished frames 3369600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561740/1666666 [1:05:16<1:27:16, 211.02it/s]

finished frames 3370200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561828/1666666 [1:05:17<1:27:16, 211.00it/s]

finished frames 3370800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 561938/1666666 [1:05:17<1:27:05, 211.43it/s]

finished frames 3371400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 562025/1666666 [1:05:18<1:29:09, 206.51it/s]

finished frames 3372000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 562132/1666666 [1:05:18<1:27:28, 210.46it/s]

finished frames 3372600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 562242/1666666 [1:05:19<1:27:08, 211.24it/s]

finished frames 3373200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 562330/1666666 [1:05:19<1:27:12, 211.05it/s]

finished frames 3373800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▎      | 562440/1666666 [1:05:20<1:27:14, 210.97it/s]

finished frames 3374400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 562528/1666666 [1:05:20<1:27:07, 211.21it/s]

finished frames 3375000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 562638/1666666 [1:05:21<1:26:59, 211.52it/s]

finished frames 3375600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 562726/1666666 [1:05:21<1:27:00, 211.45it/s]

finished frames 3376200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 562836/1666666 [1:05:22<1:27:11, 210.99it/s]

finished frames 3376800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 562924/1666666 [1:05:22<1:27:13, 210.89it/s]

finished frames 3377400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563031/1666666 [1:05:23<1:33:10, 197.43it/s]

finished frames 3378000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563139/1666666 [1:05:23<1:31:37, 200.73it/s]

finished frames 3378600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563226/1666666 [1:05:24<1:28:02, 208.87it/s]

finished frames 3379200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563336/1666666 [1:05:24<1:27:11, 210.88it/s]

finished frames 3379800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563424/1666666 [1:05:25<1:27:09, 210.96it/s]

finished frames 3380400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563533/1666666 [1:05:25<1:27:06, 211.07it/s]

finished frames 3381000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563643/1666666 [1:05:26<1:27:25, 210.28it/s]

finished frames 3381600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563731/1666666 [1:05:26<1:27:11, 210.82it/s]

finished frames 3382200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563841/1666666 [1:05:26<1:26:54, 211.50it/s]

finished frames 3382800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 563929/1666666 [1:05:27<1:26:50, 211.63it/s]

finished frames 3383400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564038/1666666 [1:05:27<1:29:10, 206.07it/s]

finished frames 3384000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564125/1666666 [1:05:28<1:27:50, 209.17it/s]

finished frames 3384600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564234/1666666 [1:05:28<1:27:29, 210.00it/s]

finished frames 3385200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564322/1666666 [1:05:29<1:27:05, 210.94it/s]

finished frames 3385800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564432/1666666 [1:05:29<1:27:15, 210.53it/s]

finished frames 3386400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564541/1666666 [1:05:30<1:27:00, 211.12it/s]

finished frames 3387000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564629/1666666 [1:05:30<1:27:04, 210.92it/s]

finished frames 3387600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564738/1666666 [1:05:31<1:27:25, 210.09it/s]

finished frames 3388200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564825/1666666 [1:05:31<1:27:29, 209.88it/s]

finished frames 3388800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 564935/1666666 [1:05:32<1:27:10, 210.65it/s]

finished frames 3389400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565022/1666666 [1:05:32<1:29:11, 205.88it/s]

finished frames 3390000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565127/1666666 [1:05:33<1:28:01, 208.55it/s]

finished frames 3390600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565236/1666666 [1:05:33<1:27:06, 210.72it/s]

finished frames 3391200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565324/1666666 [1:05:34<1:27:12, 210.47it/s]

finished frames 3391800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565434/1666666 [1:05:34<1:26:27, 212.29it/s]

finished frames 3392400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565544/1666666 [1:05:35<1:28:40, 206.96it/s]

finished frames 3393000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565632/1666666 [1:05:35<1:26:12, 212.86it/s]

finished frames 3393600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565742/1666666 [1:05:36<1:25:45, 213.97it/s]

finished frames 3394200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565830/1666666 [1:05:36<1:25:39, 214.21it/s]

finished frames 3394800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 565940/1666666 [1:05:37<1:25:44, 213.97it/s]

finished frames 3395400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566028/1666666 [1:05:37<1:27:06, 210.59it/s]

finished frames 3396000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566138/1666666 [1:05:37<1:25:56, 213.42it/s]

finished frames 3396600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566226/1666666 [1:05:38<1:25:36, 214.23it/s]

finished frames 3397200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566336/1666666 [1:05:38<1:25:50, 213.65it/s]

finished frames 3397800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566424/1666666 [1:05:39<1:25:38, 214.12it/s]

finished frames 3398400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566534/1666666 [1:05:39<1:25:32, 214.34it/s]

finished frames 3399000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566644/1666666 [1:05:40<1:25:28, 214.49it/s]

finished frames 3399600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566732/1666666 [1:05:40<1:25:27, 214.52it/s]

finished frames 3400200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566842/1666666 [1:05:41<1:25:18, 214.88it/s]

finished frames 3400800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 566930/1666666 [1:05:41<1:25:16, 214.94it/s]

finished frames 3401400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567040/1666666 [1:05:42<1:26:48, 211.13it/s]

finished frames 3402000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567128/1666666 [1:05:42<1:25:38, 214.00it/s]

finished frames 3402600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567238/1666666 [1:05:43<1:25:09, 215.19it/s]

finished frames 3403200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567326/1666666 [1:05:43<1:25:03, 215.39it/s]

finished frames 3403800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567436/1666666 [1:05:44<1:25:19, 214.70it/s]

finished frames 3404400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567524/1666666 [1:05:44<1:25:07, 215.20it/s]

finished frames 3405000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567634/1666666 [1:05:44<1:25:35, 214.01it/s]

finished frames 3405600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567744/1666666 [1:05:45<1:25:09, 215.09it/s]

finished frames 3406200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567832/1666666 [1:05:45<1:25:29, 214.23it/s]

finished frames 3406800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 567942/1666666 [1:05:46<1:28:07, 207.78it/s]

finished frames 3407400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568029/1666666 [1:05:46<1:27:59, 208.09it/s]

finished frames 3408000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568139/1666666 [1:05:47<1:26:12, 212.36it/s]

finished frames 3408600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568227/1666666 [1:05:47<1:25:23, 214.39it/s]

finished frames 3409200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568337/1666666 [1:05:48<1:25:23, 214.35it/s]

finished frames 3409800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568425/1666666 [1:05:48<1:25:05, 215.11it/s]

finished frames 3410400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568535/1666666 [1:05:49<1:25:12, 214.80it/s]

finished frames 3411000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568623/1666666 [1:05:49<1:25:30, 214.04it/s]

finished frames 3411600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568733/1666666 [1:05:50<1:25:33, 213.89it/s]

finished frames 3412200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568843/1666666 [1:05:50<1:25:20, 214.38it/s]

finished frames 3412800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 568931/1666666 [1:05:51<1:25:17, 214.51it/s]

finished frames 3413400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569041/1666666 [1:05:51<1:26:58, 210.32it/s]

finished frames 3414000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569129/1666666 [1:05:51<1:25:13, 214.63it/s]

finished frames 3414600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569239/1666666 [1:05:52<1:24:39, 216.05it/s]

finished frames 3415200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569327/1666666 [1:05:52<1:25:08, 214.80it/s]

finished frames 3415800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569437/1666666 [1:05:53<1:24:59, 215.15it/s]

finished frames 3416400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569525/1666666 [1:05:53<1:25:25, 214.04it/s]

finished frames 3417000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569636/1666666 [1:05:54<1:24:56, 215.26it/s]

finished frames 3417600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569726/1666666 [1:05:54<1:23:40, 218.49it/s]

finished frames 3418200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569836/1666666 [1:05:55<1:25:54, 212.78it/s]

finished frames 3418800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 569924/1666666 [1:05:55<1:25:21, 214.14it/s]

finished frames 3419400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570036/1666666 [1:05:56<1:26:56, 210.23it/s]

finished frames 3420000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570124/1666666 [1:05:56<1:26:55, 210.24it/s]

finished frames 3420600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570230/1666666 [1:05:57<1:28:51, 205.67it/s]

finished frames 3421200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570338/1666666 [1:05:57<1:26:54, 210.24it/s]

finished frames 3421800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570426/1666666 [1:05:58<1:26:52, 210.30it/s]

finished frames 3422400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570536/1666666 [1:05:58<1:26:15, 211.80it/s]

finished frames 3423000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570623/1666666 [1:05:59<1:27:06, 209.72it/s]

finished frames 3423600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570733/1666666 [1:05:59<1:26:28, 211.22it/s]

finished frames 3424200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570843/1666666 [1:06:00<1:26:07, 212.04it/s]

finished frames 3424800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 570931/1666666 [1:06:00<1:26:14, 211.74it/s]

finished frames 3425400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571019/1666666 [1:06:00<1:30:16, 202.29it/s]

finished frames 3426000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571126/1666666 [1:06:01<1:27:33, 208.52it/s]

finished frames 3426600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571232/1666666 [1:06:01<1:27:21, 208.97it/s]

finished frames 3427200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571338/1666666 [1:06:02<1:27:12, 209.32it/s]

finished frames 3427800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571424/1666666 [1:06:02<1:27:01, 209.75it/s]

finished frames 3428400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571534/1666666 [1:06:03<1:26:48, 210.25it/s]

finished frames 3429000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571643/1666666 [1:06:03<1:26:29, 210.99it/s]

finished frames 3429600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571730/1666666 [1:06:04<1:26:39, 210.59it/s]

finished frames 3430200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571837/1666666 [1:06:04<1:27:05, 209.51it/s]

finished frames 3430800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 571942/1666666 [1:06:05<1:27:18, 208.97it/s]

finished frames 3431400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572027/1666666 [1:06:05<1:29:52, 203.00it/s]

finished frames 3432000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572133/1666666 [1:06:06<1:27:39, 208.10it/s]

finished frames 3432600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572238/1666666 [1:06:06<1:27:36, 208.21it/s]

finished frames 3433200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572343/1666666 [1:06:07<1:27:32, 208.36it/s]

finished frames 3433800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572427/1666666 [1:06:07<1:27:32, 208.31it/s]

finished frames 3434400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572533/1666666 [1:06:08<1:31:18, 199.71it/s]

finished frames 3435000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572638/1666666 [1:06:08<1:28:11, 206.74it/s]

finished frames 3435600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572722/1666666 [1:06:09<1:27:42, 207.88it/s]

finished frames 3436200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572827/1666666 [1:06:09<1:27:11, 209.08it/s]

finished frames 3436800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 572934/1666666 [1:06:10<1:27:12, 209.01it/s]

finished frames 3437400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573039/1666666 [1:06:10<1:30:00, 202.51it/s]

finished frames 3438000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573123/1666666 [1:06:11<1:28:06, 206.85it/s]

finished frames 3438600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573229/1666666 [1:06:11<1:27:14, 208.88it/s]

finished frames 3439200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573336/1666666 [1:06:12<1:27:00, 209.45it/s]

finished frames 3439800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573422/1666666 [1:06:12<1:26:54, 209.66it/s]

finished frames 3440400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573531/1666666 [1:06:13<1:26:38, 210.27it/s]

finished frames 3441000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573640/1666666 [1:06:13<1:26:46, 209.95it/s]

finished frames 3441600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573724/1666666 [1:06:13<1:27:14, 208.79it/s]

finished frames 3442200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573832/1666666 [1:06:14<1:26:48, 209.83it/s]

finished frames 3442800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 573940/1666666 [1:06:14<1:26:45, 209.91it/s]

finished frames 3443400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574024/1666666 [1:06:15<1:30:45, 200.65it/s]

finished frames 3444000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574131/1666666 [1:06:15<1:27:47, 207.41it/s]

finished frames 3444600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574238/1666666 [1:06:16<1:27:01, 209.23it/s]

finished frames 3445200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574324/1666666 [1:06:16<1:26:51, 209.59it/s]

finished frames 3445800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574430/1666666 [1:06:17<1:26:51, 209.58it/s]

finished frames 3446400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574536/1666666 [1:06:17<1:26:50, 209.60it/s]

finished frames 3447000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574641/1666666 [1:06:18<1:26:55, 209.38it/s]

finished frames 3447600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574727/1666666 [1:06:18<1:26:57, 209.28it/s]

finished frames 3448200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574834/1666666 [1:06:19<1:34:37, 192.31it/s]

finished frames 3448800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 34%|███▍      | 574942/1666666 [1:06:19<1:27:53, 207.01it/s]

finished frames 3449400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575025/1666666 [1:06:20<1:33:10, 195.28it/s]

finished frames 3450000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575130/1666666 [1:06:20<1:28:24, 205.79it/s]

finished frames 3450600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575235/1666666 [1:06:21<1:27:36, 207.65it/s]

finished frames 3451200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575340/1666666 [1:06:21<1:27:14, 208.50it/s]

finished frames 3451800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575424/1666666 [1:06:22<1:27:27, 207.96it/s]

finished frames 3452400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575529/1666666 [1:06:22<1:27:25, 208.02it/s]

finished frames 3453000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575639/1666666 [1:06:23<1:25:46, 212.00it/s]

finished frames 3453600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575727/1666666 [1:06:23<1:25:46, 211.98it/s]

finished frames 3454200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575837/1666666 [1:06:24<1:25:42, 212.13it/s]

finished frames 3454800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 575925/1666666 [1:06:24<1:26:01, 211.34it/s]

finished frames 3455400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576035/1666666 [1:06:25<1:27:09, 208.56it/s]

finished frames 3456000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576144/1666666 [1:06:25<1:25:00, 213.82it/s]

finished frames 3456600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576232/1666666 [1:06:26<1:24:56, 213.96it/s]

finished frames 3457200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576342/1666666 [1:06:26<1:24:39, 214.64it/s]

finished frames 3457800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576430/1666666 [1:06:26<1:24:53, 214.06it/s]

finished frames 3458400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576540/1666666 [1:06:27<1:24:41, 214.52it/s]

finished frames 3459000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576628/1666666 [1:06:27<1:24:33, 214.83it/s]

finished frames 3459600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576738/1666666 [1:06:28<1:24:42, 214.44it/s]

finished frames 3460200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576826/1666666 [1:06:28<1:24:37, 214.63it/s]

finished frames 3460800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 576936/1666666 [1:06:29<1:24:14, 215.58it/s]

finished frames 3461400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577023/1666666 [1:06:29<1:27:59, 206.39it/s]

finished frames 3462000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577128/1666666 [1:06:30<1:27:00, 208.72it/s]

finished frames 3462600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577234/1666666 [1:06:30<1:26:53, 208.96it/s]

finished frames 3463200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577341/1666666 [1:06:31<1:26:26, 210.02it/s]

finished frames 3463800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577428/1666666 [1:06:31<1:26:29, 209.88it/s]

finished frames 3464400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577534/1666666 [1:06:32<1:26:36, 209.61it/s]

finished frames 3465000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577640/1666666 [1:06:32<1:26:43, 209.29it/s]

finished frames 3465600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577726/1666666 [1:06:33<1:26:30, 209.81it/s]

finished frames 3466200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577834/1666666 [1:06:33<1:26:26, 209.96it/s]

finished frames 3466800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 577941/1666666 [1:06:34<1:26:32, 209.66it/s]

finished frames 3467400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578025/1666666 [1:06:34<1:30:03, 201.47it/s]

finished frames 3468000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578132/1666666 [1:06:35<1:27:08, 208.21it/s]

finished frames 3468600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578239/1666666 [1:06:35<1:26:29, 209.73it/s]

finished frames 3469200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578324/1666666 [1:06:35<1:26:32, 209.58it/s]

finished frames 3469800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578430/1666666 [1:06:36<1:26:31, 209.61it/s]

finished frames 3470400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578535/1666666 [1:06:36<1:26:39, 209.27it/s]

finished frames 3471000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578640/1666666 [1:06:37<1:26:52, 208.74it/s]

finished frames 3471600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578725/1666666 [1:06:37<1:26:53, 208.66it/s]

finished frames 3472200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578831/1666666 [1:06:38<1:26:47, 208.91it/s]

finished frames 3472800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 578936/1666666 [1:06:38<1:26:43, 209.04it/s]

finished frames 3473400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579020/1666666 [1:06:39<1:29:35, 202.33it/s]

finished frames 3474000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579128/1666666 [1:06:39<1:26:59, 208.36it/s]

finished frames 3474600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579235/1666666 [1:06:40<1:26:27, 209.63it/s]

finished frames 3475200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579341/1666666 [1:06:40<1:26:27, 209.60it/s]

finished frames 3475800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579427/1666666 [1:06:41<1:26:32, 209.38it/s]

finished frames 3476400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579535/1666666 [1:06:41<1:26:17, 209.96it/s]

finished frames 3477000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579641/1666666 [1:06:42<1:28:27, 204.79it/s]

finished frames 3477600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579725/1666666 [1:06:42<1:30:02, 201.21it/s]

finished frames 3478200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579830/1666666 [1:06:43<1:27:27, 207.10it/s]

finished frames 3478800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 579935/1666666 [1:06:43<1:27:08, 207.86it/s]

finished frames 3479400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580041/1666666 [1:06:44<1:28:38, 204.31it/s]

finished frames 3480000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580126/1666666 [1:06:44<1:26:54, 208.35it/s]

finished frames 3480600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580233/1666666 [1:06:45<1:26:23, 209.58it/s]

finished frames 3481200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580340/1666666 [1:06:45<1:26:24, 209.53it/s]

finished frames 3481800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580426/1666666 [1:06:46<1:26:25, 209.47it/s]

finished frames 3482400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580532/1666666 [1:06:46<1:26:31, 209.21it/s]

finished frames 3483000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580638/1666666 [1:06:47<1:26:45, 208.61it/s]

finished frames 3483600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580723/1666666 [1:06:47<1:26:37, 208.93it/s]

finished frames 3484200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580830/1666666 [1:06:48<1:26:20, 209.59it/s]

finished frames 3484800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 580937/1666666 [1:06:48<1:26:30, 209.16it/s]

finished frames 3485400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581021/1666666 [1:06:48<1:29:25, 202.32it/s]

finished frames 3486000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581128/1666666 [1:06:49<1:26:42, 208.67it/s]

finished frames 3486600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581235/1666666 [1:06:50<1:26:16, 209.67it/s]

finished frames 3487200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581343/1666666 [1:06:50<1:26:11, 209.85it/s]

finished frames 3487800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581428/1666666 [1:06:50<1:26:23, 209.37it/s]

finished frames 3488400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581533/1666666 [1:06:51<1:26:34, 208.89it/s]

finished frames 3489000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581638/1666666 [1:06:51<1:26:32, 208.95it/s]

finished frames 3489600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581723/1666666 [1:06:52<1:26:30, 209.04it/s]

finished frames 3490200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581829/1666666 [1:06:52<1:26:20, 209.40it/s]

finished frames 3490800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 581935/1666666 [1:06:53<1:29:55, 201.04it/s]

finished frames 3491400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582040/1666666 [1:06:53<1:28:59, 203.14it/s]

finished frames 3492000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582126/1666666 [1:06:54<1:26:53, 208.01it/s]

finished frames 3492600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582231/1666666 [1:06:54<1:26:35, 208.74it/s]

finished frames 3493200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582337/1666666 [1:06:55<1:27:49, 205.78it/s]

finished frames 3493800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582442/1666666 [1:06:55<1:26:40, 208.48it/s]

finished frames 3494400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582526/1666666 [1:06:56<1:27:04, 207.50it/s]

finished frames 3495000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582631/1666666 [1:06:56<1:26:42, 208.35it/s]

finished frames 3495600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582736/1666666 [1:06:57<1:26:41, 208.38it/s]

finished frames 3496200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582842/1666666 [1:06:57<1:26:33, 208.69it/s]

finished frames 3496800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 582926/1666666 [1:06:58<1:26:31, 208.76it/s]

finished frames 3497400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 583031/1666666 [1:06:58<1:28:14, 204.68it/s]

finished frames 3498000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 583139/1666666 [1:06:59<1:26:23, 209.05it/s]

finished frames 3498600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 583224/1666666 [1:06:59<1:26:22, 209.07it/s]

finished frames 3499200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▍      | 583330/1666666 [1:07:00<1:26:20, 209.14it/s]

finished frames 3499800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 583437/1666666 [1:07:00<1:26:00, 209.91it/s]

finished frames 3500400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 583524/1666666 [1:07:01<1:25:56, 210.04it/s]

finished frames 3501000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 583633/1666666 [1:07:01<1:26:02, 209.78it/s]

finished frames 3501600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 583741/1666666 [1:07:02<1:25:55, 210.05it/s]

finished frames 3502200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 583827/1666666 [1:07:02<1:26:02, 209.76it/s]

finished frames 3502800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 583935/1666666 [1:07:03<1:26:01, 209.76it/s]

finished frames 3503400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584040/1666666 [1:07:03<1:28:16, 204.42it/s]

finished frames 3504000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584127/1666666 [1:07:03<1:26:27, 208.68it/s]

finished frames 3504600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584232/1666666 [1:07:04<1:35:32, 188.84it/s]

finished frames 3505200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584338/1666666 [1:07:04<1:28:53, 202.93it/s]

finished frames 3505800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584422/1666666 [1:07:05<1:28:09, 204.60it/s]

finished frames 3506400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584527/1666666 [1:07:05<1:26:59, 207.31it/s]

finished frames 3507000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584632/1666666 [1:07:06<1:26:39, 208.09it/s]

finished frames 3507600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584737/1666666 [1:07:06<1:26:44, 207.89it/s]

finished frames 3508200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584842/1666666 [1:07:07<1:26:39, 208.08it/s]

finished frames 3508800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 584926/1666666 [1:07:07<1:26:33, 208.28it/s]

finished frames 3509400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585031/1666666 [1:07:08<1:28:45, 203.10it/s]

finished frames 3510000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585136/1666666 [1:07:08<1:27:17, 206.49it/s]

finished frames 3510600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585241/1666666 [1:07:09<1:26:35, 208.15it/s]

finished frames 3511200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585326/1666666 [1:07:09<1:26:13, 209.00it/s]

finished frames 3511800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585431/1666666 [1:07:10<1:26:24, 208.56it/s]

finished frames 3512400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585536/1666666 [1:07:10<1:26:22, 208.62it/s]

finished frames 3513000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585641/1666666 [1:07:11<1:26:27, 208.39it/s]

finished frames 3513600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585726/1666666 [1:07:11<1:26:21, 208.60it/s]

finished frames 3514200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585831/1666666 [1:07:12<1:26:38, 207.90it/s]

finished frames 3514800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 585937/1666666 [1:07:12<1:26:22, 208.54it/s]

finished frames 3515400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586021/1666666 [1:07:13<1:28:59, 202.40it/s]

finished frames 3516000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586126/1666666 [1:07:13<1:26:59, 207.01it/s]

finished frames 3516600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586231/1666666 [1:07:14<1:26:46, 207.50it/s]

finished frames 3517200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586336/1666666 [1:07:14<1:26:39, 207.79it/s]

finished frames 3517800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586441/1666666 [1:07:15<1:26:30, 208.14it/s]

finished frames 3518400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586526/1666666 [1:07:15<1:26:12, 208.83it/s]

finished frames 3519000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586631/1666666 [1:07:16<1:28:47, 202.73it/s]

finished frames 3519600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586737/1666666 [1:07:16<1:31:30, 196.70it/s]

finished frames 3520200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586842/1666666 [1:07:17<1:27:45, 205.07it/s]

finished frames 3520800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 586927/1666666 [1:07:17<1:26:20, 208.41it/s]

finished frames 3521400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587032/1666666 [1:07:18<1:28:13, 203.96it/s]

finished frames 3522000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587137/1666666 [1:07:18<1:26:37, 207.72it/s]

finished frames 3522600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587242/1666666 [1:07:19<1:26:09, 208.81it/s]

finished frames 3523200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587327/1666666 [1:07:19<1:26:10, 208.75it/s]

finished frames 3523800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587432/1666666 [1:07:20<1:26:11, 208.68it/s]

finished frames 3524400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587539/1666666 [1:07:20<1:25:51, 209.48it/s]

finished frames 3525000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587624/1666666 [1:07:20<1:25:58, 209.20it/s]

finished frames 3525600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587730/1666666 [1:07:21<1:25:58, 209.18it/s]

finished frames 3526200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587837/1666666 [1:07:21<1:25:56, 209.23it/s]

finished frames 3526800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 587943/1666666 [1:07:22<1:26:01, 209.00it/s]

finished frames 3527400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588027/1666666 [1:07:22<1:28:27, 203.24it/s]

finished frames 3528000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588133/1666666 [1:07:23<1:26:29, 207.84it/s]

finished frames 3528600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588238/1666666 [1:07:23<1:26:11, 208.53it/s]

finished frames 3529200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588324/1666666 [1:07:24<1:25:43, 209.63it/s]

finished frames 3529800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588432/1666666 [1:07:24<1:24:48, 211.89it/s]

finished frames 3530400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588542/1666666 [1:07:25<1:23:40, 214.75it/s]

finished frames 3531000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588630/1666666 [1:07:25<1:23:35, 214.92it/s]

finished frames 3531600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588740/1666666 [1:07:26<1:23:32, 215.04it/s]

finished frames 3532200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588828/1666666 [1:07:26<1:23:37, 214.80it/s]

finished frames 3532800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 588938/1666666 [1:07:27<1:27:06, 206.19it/s]

finished frames 3533400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589025/1666666 [1:07:27<1:26:18, 208.11it/s]

finished frames 3534000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589134/1666666 [1:07:28<1:23:58, 213.84it/s]

finished frames 3534600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589244/1666666 [1:07:28<1:23:35, 214.83it/s]

finished frames 3535200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589332/1666666 [1:07:29<1:23:40, 214.57it/s]

finished frames 3535800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589442/1666666 [1:07:29<1:23:41, 214.53it/s]

finished frames 3536400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589530/1666666 [1:07:29<1:23:41, 214.53it/s]

finished frames 3537000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589640/1666666 [1:07:30<1:23:18, 215.49it/s]

finished frames 3537600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589728/1666666 [1:07:30<1:22:58, 216.33it/s]

finished frames 3538200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589838/1666666 [1:07:31<1:22:53, 216.50it/s]

finished frames 3538800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 589926/1666666 [1:07:31<1:22:46, 216.82it/s]

finished frames 3539400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590036/1666666 [1:07:32<1:24:56, 211.25it/s]

finished frames 3540000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590124/1666666 [1:07:32<1:23:47, 214.15it/s]

finished frames 3540600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590234/1666666 [1:07:33<1:23:26, 215.00it/s]

finished frames 3541200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590344/1666666 [1:07:33<1:23:27, 214.95it/s]

finished frames 3541800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590432/1666666 [1:07:34<1:23:39, 214.42it/s]

finished frames 3542400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590542/1666666 [1:07:34<1:23:42, 214.27it/s]

finished frames 3543000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590630/1666666 [1:07:35<1:23:37, 214.44it/s]

finished frames 3543600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590740/1666666 [1:07:35<1:23:43, 214.16it/s]

finished frames 3544200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590828/1666666 [1:07:36<1:23:39, 214.32it/s]

finished frames 3544800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 590938/1666666 [1:07:36<1:23:36, 214.45it/s]

finished frames 3545400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 591026/1666666 [1:07:36<1:25:13, 210.35it/s]

finished frames 3546000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 591136/1666666 [1:07:37<1:23:46, 213.97it/s]

finished frames 3546600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 591224/1666666 [1:07:37<1:23:48, 213.87it/s]

finished frames 3547200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 591334/1666666 [1:07:38<1:23:42, 214.12it/s]

finished frames 3547800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 591444/1666666 [1:07:38<1:23:28, 214.69it/s]

finished frames 3548400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 591532/1666666 [1:07:39<1:23:27, 214.69it/s]

finished frames 3549000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 35%|███▌      | 591642/1666666 [1:07:39<1:23:19, 215.03it/s]

finished frames 3549600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 591730/1666666 [1:07:40<1:23:24, 214.77it/s]

finished frames 3550200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 591840/1666666 [1:07:40<1:23:27, 214.63it/s]

finished frames 3550800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 591928/1666666 [1:07:41<1:23:18, 215.03it/s]

finished frames 3551400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592038/1666666 [1:07:41<1:25:12, 210.19it/s]

finished frames 3552000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592126/1666666 [1:07:42<1:23:49, 213.63it/s]

finished frames 3552600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592236/1666666 [1:07:42<1:23:23, 214.72it/s]

finished frames 3553200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592324/1666666 [1:07:43<1:23:27, 214.56it/s]

finished frames 3553800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592434/1666666 [1:07:43<1:23:30, 214.40it/s]

finished frames 3554400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592544/1666666 [1:07:44<1:23:21, 214.77it/s]

finished frames 3555000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592632/1666666 [1:07:44<1:23:33, 214.21it/s]

finished frames 3555600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592742/1666666 [1:07:45<1:23:13, 215.05it/s]

finished frames 3556200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592830/1666666 [1:07:45<1:23:26, 214.49it/s]

finished frames 3556800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 592940/1666666 [1:07:45<1:23:27, 214.41it/s]

finished frames 3557400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593028/1666666 [1:07:46<1:25:30, 209.25it/s]

finished frames 3558000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593138/1666666 [1:07:46<1:24:01, 212.96it/s]

finished frames 3558600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593226/1666666 [1:07:47<1:24:47, 211.01it/s]

finished frames 3559200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593336/1666666 [1:07:47<1:23:57, 213.08it/s]

finished frames 3559800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593424/1666666 [1:07:48<1:23:54, 213.16it/s]

finished frames 3560400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593534/1666666 [1:07:48<1:23:57, 213.02it/s]

finished frames 3561000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593644/1666666 [1:07:49<1:23:33, 214.02it/s]

finished frames 3561600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593732/1666666 [1:07:49<1:28:14, 202.64it/s]

finished frames 3562200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593841/1666666 [1:07:50<1:24:08, 212.51it/s]

finished frames 3562800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 593929/1666666 [1:07:50<1:23:25, 214.32it/s]

finished frames 3563400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594038/1666666 [1:07:51<1:26:47, 205.98it/s]

finished frames 3564000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594122/1666666 [1:07:51<1:27:17, 204.78it/s]

finished frames 3564600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594227/1666666 [1:07:52<1:27:11, 205.01it/s]

finished frames 3565200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594332/1666666 [1:07:52<1:26:13, 207.28it/s]

finished frames 3565800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594437/1666666 [1:07:53<1:26:04, 207.60it/s]

finished frames 3566400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594542/1666666 [1:07:53<1:26:38, 206.25it/s]

finished frames 3567000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594629/1666666 [1:07:54<1:25:09, 209.83it/s]

finished frames 3567600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594740/1666666 [1:07:54<1:23:30, 213.93it/s]

finished frames 3568200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594830/1666666 [1:07:54<1:23:03, 215.08it/s]

finished frames 3568800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 594940/1666666 [1:07:55<1:24:32, 211.26it/s]

finished frames 3569400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595028/1666666 [1:07:55<1:24:25, 211.57it/s]

finished frames 3570000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595139/1666666 [1:07:56<1:22:39, 216.05it/s]

finished frames 3570600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595228/1666666 [1:07:56<1:25:09, 209.71it/s]

finished frames 3571200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595334/1666666 [1:07:57<1:26:31, 206.34it/s]

finished frames 3571800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595443/1666666 [1:07:57<1:23:40, 213.36it/s]

finished frames 3572400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595531/1666666 [1:07:58<1:23:17, 214.35it/s]

finished frames 3573000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595641/1666666 [1:07:58<1:24:30, 211.21it/s]

finished frames 3573600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595729/1666666 [1:07:59<1:24:09, 212.07it/s]

finished frames 3574200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595839/1666666 [1:07:59<1:23:01, 214.96it/s]

finished frames 3574800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 595927/1666666 [1:08:00<1:22:54, 215.26it/s]

finished frames 3575400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596037/1666666 [1:08:00<1:24:57, 210.04it/s]

finished frames 3576000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596125/1666666 [1:08:01<1:26:59, 205.10it/s]

finished frames 3576600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596234/1666666 [1:08:01<1:23:44, 213.06it/s]

finished frames 3577200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596344/1666666 [1:08:02<1:23:07, 214.61it/s]

finished frames 3577800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596432/1666666 [1:08:02<1:23:20, 214.04it/s]

finished frames 3578400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596542/1666666 [1:08:03<1:23:09, 214.49it/s]

finished frames 3579000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596630/1666666 [1:08:03<1:23:09, 214.44it/s]

finished frames 3579600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596740/1666666 [1:08:03<1:22:59, 214.88it/s]

finished frames 3580200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596828/1666666 [1:08:04<1:22:59, 214.83it/s]

finished frames 3580800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 596938/1666666 [1:08:04<1:22:50, 215.22it/s]

finished frames 3581400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597026/1666666 [1:08:05<1:24:50, 210.13it/s]

finished frames 3582000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597136/1666666 [1:08:05<1:23:13, 214.19it/s]

finished frames 3582600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597224/1666666 [1:08:06<1:23:09, 214.32it/s]

finished frames 3583200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597334/1666666 [1:08:06<1:24:23, 211.20it/s]

finished frames 3583800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597422/1666666 [1:08:07<1:24:17, 211.43it/s]

finished frames 3584400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597532/1666666 [1:08:07<1:23:22, 213.73it/s]

finished frames 3585000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597642/1666666 [1:08:08<1:23:22, 213.71it/s]

finished frames 3585600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597730/1666666 [1:08:08<1:23:34, 213.18it/s]

finished frames 3586200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597840/1666666 [1:08:09<1:23:23, 213.63it/s]

finished frames 3586800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 597928/1666666 [1:08:09<1:23:36, 213.06it/s]

finished frames 3587400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598038/1666666 [1:08:10<1:25:36, 208.03it/s]

finished frames 3588000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598125/1666666 [1:08:10<1:24:02, 211.91it/s]

finished frames 3588600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598235/1666666 [1:08:10<1:23:32, 213.16it/s]

finished frames 3589200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598323/1666666 [1:08:11<1:23:22, 213.57it/s]

finished frames 3589800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598433/1666666 [1:08:11<1:23:17, 213.75it/s]

finished frames 3590400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598543/1666666 [1:08:12<1:26:56, 204.76it/s]

finished frames 3591000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598629/1666666 [1:08:12<1:27:28, 203.49it/s]

finished frames 3591600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598739/1666666 [1:08:13<1:23:11, 213.93it/s]

finished frames 3592200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598827/1666666 [1:08:13<1:22:53, 214.72it/s]

finished frames 3592800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 598937/1666666 [1:08:14<1:22:48, 214.91it/s]

finished frames 3593400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599025/1666666 [1:08:14<1:24:31, 210.50it/s]

finished frames 3594000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599135/1666666 [1:08:15<1:24:02, 211.69it/s]

finished frames 3594600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599223/1666666 [1:08:15<1:24:21, 210.90it/s]

finished frames 3595200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599333/1666666 [1:08:16<1:24:03, 211.61it/s]

finished frames 3595800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599443/1666666 [1:08:16<1:24:07, 211.45it/s]

finished frames 3596400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599531/1666666 [1:08:17<1:23:56, 211.90it/s]

finished frames 3597000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599641/1666666 [1:08:17<1:23:56, 211.85it/s]

finished frames 3597600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599729/1666666 [1:08:18<1:24:00, 211.68it/s]

finished frames 3598200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599839/1666666 [1:08:18<1:23:50, 212.07it/s]

finished frames 3598800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 599927/1666666 [1:08:19<1:23:49, 212.11it/s]

finished frames 3599400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600036/1666666 [1:08:19<1:26:22, 205.80it/s]

finished frames 3600000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600141/1666666 [1:08:20<1:25:33, 207.77it/s]

finished frames 3600600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600225/1666666 [1:08:20<1:25:32, 207.76it/s]

finished frames 3601200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600330/1666666 [1:08:21<1:25:40, 207.45it/s]

finished frames 3601800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600435/1666666 [1:08:21<1:25:37, 207.53it/s]

finished frames 3602400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600540/1666666 [1:08:22<1:25:39, 207.43it/s]

finished frames 3603000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600624/1666666 [1:08:22<1:25:39, 207.43it/s]

finished frames 3603600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600729/1666666 [1:08:22<1:25:30, 207.75it/s]

finished frames 3604200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600833/1666666 [1:08:23<1:32:00, 193.08it/s]

finished frames 3604800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 600917/1666666 [1:08:23<1:26:51, 204.49it/s]

finished frames 3605400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601022/1666666 [1:08:24<1:29:12, 199.09it/s]

finished frames 3606000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601130/1666666 [1:08:24<1:24:36, 209.91it/s]

finished frames 3606600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601240/1666666 [1:08:25<1:23:23, 212.95it/s]

finished frames 3607200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601328/1666666 [1:08:25<1:23:17, 213.19it/s]

finished frames 3607800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601438/1666666 [1:08:26<1:23:16, 213.21it/s]

finished frames 3608400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601526/1666666 [1:08:26<1:23:16, 213.17it/s]

finished frames 3609000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601636/1666666 [1:08:27<1:23:14, 213.24it/s]

finished frames 3609600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601724/1666666 [1:08:27<1:23:15, 213.17it/s]

finished frames 3610200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601834/1666666 [1:08:28<1:23:01, 213.77it/s]

finished frames 3610800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 601944/1666666 [1:08:28<1:22:46, 214.37it/s]

finished frames 3611400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602031/1666666 [1:08:29<1:25:52, 206.62it/s]

finished frames 3612000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602136/1666666 [1:08:29<1:25:23, 207.79it/s]

finished frames 3612600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602241/1666666 [1:08:30<1:25:23, 207.76it/s]

finished frames 3613200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602326/1666666 [1:08:30<1:25:12, 208.17it/s]

finished frames 3613800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602434/1666666 [1:08:31<1:24:44, 209.29it/s]

finished frames 3614400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602540/1666666 [1:08:31<1:24:47, 209.18it/s]

finished frames 3615000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602626/1666666 [1:08:32<1:24:42, 209.34it/s]

finished frames 3615600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602732/1666666 [1:08:32<1:24:52, 208.93it/s]

finished frames 3616200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602838/1666666 [1:08:33<1:24:43, 209.28it/s]

finished frames 3616800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 602922/1666666 [1:08:33<1:25:08, 208.25it/s]

finished frames 3617400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603027/1666666 [1:08:33<1:27:18, 203.04it/s]

finished frames 3618000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603132/1666666 [1:08:34<1:25:58, 206.17it/s]

finished frames 3618600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603238/1666666 [1:08:35<1:30:21, 196.16it/s]

finished frames 3619200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603322/1666666 [1:08:35<1:30:05, 196.70it/s]

finished frames 3619800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603429/1666666 [1:08:36<1:25:36, 207.00it/s]

finished frames 3620400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603537/1666666 [1:08:36<1:24:36, 209.43it/s]

finished frames 3621000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603623/1666666 [1:08:36<1:24:37, 209.36it/s]

finished frames 3621600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603729/1666666 [1:08:37<1:24:40, 209.22it/s]

finished frames 3622200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603835/1666666 [1:08:37<1:24:45, 208.97it/s]

finished frames 3622800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 603940/1666666 [1:08:38<1:24:57, 208.48it/s]

finished frames 3623400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 604024/1666666 [1:08:38<1:26:50, 203.95it/s]

finished frames 3624000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▌      | 604130/1666666 [1:08:39<1:25:13, 207.79it/s]

finished frames 3624600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 604235/1666666 [1:08:39<1:25:00, 208.30it/s]

finished frames 3625200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 604341/1666666 [1:08:40<1:24:48, 208.78it/s]

finished frames 3625800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 604425/1666666 [1:08:40<1:24:56, 208.44it/s]

finished frames 3626400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 604531/1666666 [1:08:41<1:24:48, 208.72it/s]

finished frames 3627000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 604638/1666666 [1:08:41<1:24:41, 208.99it/s]

finished frames 3627600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 604722/1666666 [1:08:42<1:25:16, 207.53it/s]

finished frames 3628200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 604827/1666666 [1:08:42<1:24:57, 208.33it/s]

finished frames 3628800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 604932/1666666 [1:08:43<1:25:01, 208.10it/s]

finished frames 3629400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605037/1666666 [1:08:43<1:26:56, 203.50it/s]

finished frames 3630000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605143/1666666 [1:08:44<1:25:04, 207.96it/s]

finished frames 3630600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605227/1666666 [1:08:44<1:24:58, 208.18it/s]

finished frames 3631200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605332/1666666 [1:08:45<1:24:58, 208.17it/s]

finished frames 3631800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605438/1666666 [1:08:45<1:24:51, 208.43it/s]

finished frames 3632400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605522/1666666 [1:08:46<1:25:12, 207.55it/s]

finished frames 3633000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605628/1666666 [1:08:46<1:24:53, 208.30it/s]

finished frames 3633600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605734/1666666 [1:08:47<1:25:11, 207.58it/s]

finished frames 3634200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605839/1666666 [1:08:47<1:24:53, 208.25it/s]

finished frames 3634800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 605926/1666666 [1:08:48<1:23:56, 210.61it/s]

finished frames 3635400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606035/1666666 [1:08:48<1:25:36, 206.50it/s]

finished frames 3636000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606144/1666666 [1:08:49<1:23:38, 211.32it/s]

finished frames 3636600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606232/1666666 [1:08:49<1:23:02, 212.83it/s]

finished frames 3637200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606342/1666666 [1:08:50<1:22:32, 214.11it/s]

finished frames 3637800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606430/1666666 [1:08:50<1:22:36, 213.93it/s]

finished frames 3638400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606540/1666666 [1:08:50<1:22:26, 214.31it/s]

finished frames 3639000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606628/1666666 [1:08:51<1:22:35, 213.92it/s]

finished frames 3639600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606738/1666666 [1:08:51<1:23:03, 212.67it/s]

finished frames 3640200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606826/1666666 [1:08:52<1:23:35, 211.32it/s]

finished frames 3640800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 606936/1666666 [1:08:52<1:23:29, 211.53it/s]

finished frames 3641400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607023/1666666 [1:08:53<1:25:32, 206.46it/s]

finished frames 3642000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607129/1666666 [1:08:53<1:26:51, 203.32it/s]

finished frames 3642600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607234/1666666 [1:08:54<1:25:07, 207.44it/s]

finished frames 3643200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607341/1666666 [1:08:54<1:24:25, 209.14it/s]

finished frames 3643800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607425/1666666 [1:08:55<1:24:36, 208.65it/s]

finished frames 3644400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607532/1666666 [1:08:55<1:24:34, 208.71it/s]

finished frames 3645000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607638/1666666 [1:08:56<1:26:12, 204.74it/s]

finished frames 3645600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607725/1666666 [1:08:56<1:23:51, 210.47it/s]

finished frames 3646200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607835/1666666 [1:08:57<1:22:54, 212.84it/s]

finished frames 3646800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 607923/1666666 [1:08:57<1:25:22, 206.69it/s]

finished frames 3647400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 608031/1666666 [1:08:58<1:28:03, 200.38it/s]

finished frames 3648000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 608140/1666666 [1:08:58<1:23:44, 210.69it/s]

finished frames 3648600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 36%|███▋      | 608228/1666666 [1:08:59<1:22:59, 212.55it/s]

finished frames 3649200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 608338/1666666 [1:08:59<1:22:33, 213.67it/s]

finished frames 3649800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 608426/1666666 [1:08:59<1:22:25, 214.00it/s]

finished frames 3650400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 608536/1666666 [1:09:00<1:22:31, 213.69it/s]

finished frames 3651000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 608624/1666666 [1:09:00<1:22:24, 214.00it/s]

finished frames 3651600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 608734/1666666 [1:09:01<1:21:49, 215.48it/s]

finished frames 3652200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 608844/1666666 [1:09:01<1:21:39, 215.90it/s]

finished frames 3652800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 608932/1666666 [1:09:02<1:21:40, 215.83it/s]

finished frames 3653400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609042/1666666 [1:09:02<1:23:34, 210.91it/s]

finished frames 3654000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609130/1666666 [1:09:03<1:22:17, 214.20it/s]

finished frames 3654600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609240/1666666 [1:09:03<1:21:48, 215.42it/s]

finished frames 3655200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609328/1666666 [1:09:04<1:21:38, 215.84it/s]

finished frames 3655800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609438/1666666 [1:09:04<1:21:28, 216.28it/s]

finished frames 3656400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609526/1666666 [1:09:05<1:21:28, 216.25it/s]

finished frames 3657000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609636/1666666 [1:09:05<1:21:31, 216.10it/s]

finished frames 3657600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609724/1666666 [1:09:06<1:21:28, 216.21it/s]

finished frames 3658200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609834/1666666 [1:09:06<1:21:23, 216.40it/s]

finished frames 3658800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 609944/1666666 [1:09:07<1:21:28, 216.18it/s]

finished frames 3659400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610032/1666666 [1:09:07<1:22:39, 213.06it/s]

finished frames 3660000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610142/1666666 [1:09:07<1:21:31, 216.01it/s]

finished frames 3660600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610230/1666666 [1:09:08<1:21:25, 216.23it/s]

finished frames 3661200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610340/1666666 [1:09:08<1:23:00, 212.10it/s]

finished frames 3661800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610428/1666666 [1:09:09<1:25:12, 206.58it/s]

finished frames 3662400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610538/1666666 [1:09:09<1:22:48, 212.56it/s]

finished frames 3663000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610626/1666666 [1:09:10<1:22:59, 212.06it/s]

finished frames 3663600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610736/1666666 [1:09:10<1:23:07, 211.71it/s]

finished frames 3664200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610824/1666666 [1:09:11<1:23:03, 211.87it/s]

finished frames 3664800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 610934/1666666 [1:09:11<1:23:07, 211.66it/s]

finished frames 3665400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611022/1666666 [1:09:12<1:25:33, 205.63it/s]

finished frames 3666000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611131/1666666 [1:09:12<1:23:38, 210.35it/s]

finished frames 3666600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611241/1666666 [1:09:13<1:23:00, 211.93it/s]

finished frames 3667200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611329/1666666 [1:09:13<1:22:54, 212.15it/s]

finished frames 3667800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611439/1666666 [1:09:14<1:22:56, 212.05it/s]

finished frames 3668400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611527/1666666 [1:09:14<1:22:53, 212.17it/s]

finished frames 3669000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611637/1666666 [1:09:15<1:23:03, 211.69it/s]

finished frames 3669600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611725/1666666 [1:09:15<1:23:00, 211.82it/s]

finished frames 3670200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611835/1666666 [1:09:16<1:25:14, 206.23it/s]

finished frames 3670800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 611940/1666666 [1:09:16<1:25:23, 205.87it/s]

finished frames 3671400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612025/1666666 [1:09:16<1:26:31, 203.16it/s]

finished frames 3672000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612133/1666666 [1:09:17<1:23:44, 209.86it/s]

finished frames 3672600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612242/1666666 [1:09:18<1:23:05, 211.52it/s]

finished frames 3673200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612330/1666666 [1:09:18<1:23:09, 211.31it/s]

finished frames 3673800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612440/1666666 [1:09:18<1:23:17, 210.97it/s]

finished frames 3674400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612528/1666666 [1:09:19<1:23:20, 210.82it/s]

finished frames 3675000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612636/1666666 [1:09:19<1:28:55, 197.56it/s]

finished frames 3675600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612723/1666666 [1:09:20<1:24:43, 207.34it/s]

finished frames 3676200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612831/1666666 [1:09:20<1:28:36, 198.22it/s]

finished frames 3676800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 612939/1666666 [1:09:21<1:23:57, 209.19it/s]

finished frames 3677400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613024/1666666 [1:09:21<1:25:05, 206.36it/s]

finished frames 3678000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613130/1666666 [1:09:22<1:24:31, 207.72it/s]

finished frames 3678600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613235/1666666 [1:09:22<1:26:08, 203.80it/s]

finished frames 3679200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613340/1666666 [1:09:23<1:25:09, 206.14it/s]

finished frames 3679800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613424/1666666 [1:09:23<1:24:55, 206.71it/s]

finished frames 3680400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613529/1666666 [1:09:24<1:24:50, 206.89it/s]

finished frames 3681000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613634/1666666 [1:09:24<1:24:56, 206.61it/s]

finished frames 3681600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613739/1666666 [1:09:25<1:24:46, 206.99it/s]

finished frames 3682200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613824/1666666 [1:09:25<1:24:12, 208.38it/s]

finished frames 3682800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 613929/1666666 [1:09:26<1:24:12, 208.35it/s]

finished frames 3683400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614034/1666666 [1:09:26<1:26:10, 203.60it/s]

finished frames 3684000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614140/1666666 [1:09:27<1:24:18, 208.08it/s]

finished frames 3684600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614226/1666666 [1:09:27<1:23:55, 209.01it/s]

finished frames 3685200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614332/1666666 [1:09:28<1:24:50, 206.71it/s]

finished frames 3685800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614437/1666666 [1:09:28<1:25:37, 204.82it/s]

finished frames 3686400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614542/1666666 [1:09:29<1:25:00, 206.26it/s]

finished frames 3687000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614626/1666666 [1:09:29<1:24:29, 207.51it/s]

finished frames 3687600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614731/1666666 [1:09:30<1:24:32, 207.39it/s]

finished frames 3688200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614836/1666666 [1:09:30<1:24:25, 207.66it/s]

finished frames 3688800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 614941/1666666 [1:09:31<1:24:27, 207.54it/s]

finished frames 3689400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615025/1666666 [1:09:31<1:26:19, 203.04it/s]

finished frames 3690000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615130/1666666 [1:09:32<1:24:44, 206.82it/s]

finished frames 3690600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615235/1666666 [1:09:32<1:24:10, 208.17it/s]

finished frames 3691200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615340/1666666 [1:09:33<1:24:11, 208.11it/s]

finished frames 3691800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615427/1666666 [1:09:33<1:22:42, 211.84it/s]

finished frames 3692400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615537/1666666 [1:09:33<1:22:17, 212.88it/s]

finished frames 3693000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615624/1666666 [1:09:34<1:23:37, 209.48it/s]

finished frames 3693600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615729/1666666 [1:09:34<1:24:04, 208.34it/s]

finished frames 3694200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615837/1666666 [1:09:35<1:22:34, 212.09it/s]

finished frames 3694800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 615925/1666666 [1:09:35<1:22:08, 213.21it/s]

finished frames 3695400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616035/1666666 [1:09:36<1:24:20, 207.63it/s]

finished frames 3696000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616144/1666666 [1:09:36<1:22:41, 211.75it/s]

finished frames 3696600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616232/1666666 [1:09:37<1:22:26, 212.34it/s]

finished frames 3697200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616342/1666666 [1:09:37<1:22:23, 212.46it/s]

finished frames 3697800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616430/1666666 [1:09:38<1:22:11, 212.97it/s]

finished frames 3698400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616540/1666666 [1:09:38<1:22:34, 211.96it/s]

finished frames 3699000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616628/1666666 [1:09:39<1:22:33, 211.98it/s]

finished frames 3699600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616738/1666666 [1:09:39<1:22:27, 212.23it/s]

finished frames 3700200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616826/1666666 [1:09:40<1:22:30, 212.05it/s]

finished frames 3700800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 616933/1666666 [1:09:40<1:23:44, 208.92it/s]

finished frames 3701400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617039/1666666 [1:09:41<1:24:51, 206.16it/s]

finished frames 3702000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617126/1666666 [1:09:41<1:23:04, 210.58it/s]

finished frames 3702600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617236/1666666 [1:09:42<1:22:33, 211.84it/s]

finished frames 3703200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617323/1666666 [1:09:42<1:33:34, 186.90it/s]

finished frames 3703800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617429/1666666 [1:09:43<1:25:33, 204.40it/s]

finished frames 3704400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617537/1666666 [1:09:43<1:23:31, 209.35it/s]

finished frames 3705000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617623/1666666 [1:09:43<1:23:30, 209.35it/s]

finished frames 3705600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617732/1666666 [1:09:44<1:23:16, 209.91it/s]

finished frames 3706200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617840/1666666 [1:09:44<1:23:02, 210.49it/s]

finished frames 3706800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 617927/1666666 [1:09:45<1:23:22, 209.63it/s]

finished frames 3707400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618033/1666666 [1:09:45<1:25:16, 204.93it/s]

finished frames 3708000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618141/1666666 [1:09:46<1:23:33, 209.12it/s]

finished frames 3708600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618227/1666666 [1:09:46<1:23:38, 208.92it/s]

finished frames 3709200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618332/1666666 [1:09:47<1:25:49, 203.59it/s]

finished frames 3709800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618442/1666666 [1:09:47<1:22:13, 212.48it/s]

finished frames 3710400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618530/1666666 [1:09:48<1:21:41, 213.86it/s]

finished frames 3711000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618640/1666666 [1:09:48<1:21:38, 213.96it/s]

finished frames 3711600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618728/1666666 [1:09:49<1:22:17, 212.25it/s]

finished frames 3712200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618838/1666666 [1:09:49<1:22:31, 211.61it/s]

finished frames 3712800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 618926/1666666 [1:09:50<1:22:42, 211.14it/s]

finished frames 3713400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619036/1666666 [1:09:50<1:24:02, 207.75it/s]

finished frames 3714000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619123/1666666 [1:09:51<1:23:00, 210.32it/s]

finished frames 3714600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619233/1666666 [1:09:51<1:22:25, 211.79it/s]

finished frames 3715200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619340/1666666 [1:09:52<1:23:50, 208.20it/s]

finished frames 3715800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619425/1666666 [1:09:52<1:23:52, 208.10it/s]

finished frames 3716400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619530/1666666 [1:09:53<1:24:01, 207.69it/s]

finished frames 3717000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619635/1666666 [1:09:53<1:27:55, 198.48it/s]

finished frames 3717600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619723/1666666 [1:09:54<1:22:52, 210.55it/s]

finished frames 3718200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619833/1666666 [1:09:54<1:22:55, 210.41it/s]

finished frames 3718800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 619942/1666666 [1:09:55<1:23:43, 208.36it/s]

finished frames 3719400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620026/1666666 [1:09:55<1:26:54, 200.70it/s]

finished frames 3720000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620135/1666666 [1:09:55<1:22:37, 211.12it/s]

finished frames 3720600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620223/1666666 [1:09:56<1:23:02, 210.02it/s]

finished frames 3721200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620332/1666666 [1:09:56<1:24:03, 207.44it/s]

finished frames 3721800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620437/1666666 [1:09:57<1:26:30, 201.55it/s]

finished frames 3722400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620542/1666666 [1:09:57<1:24:04, 207.37it/s]

finished frames 3723000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620627/1666666 [1:09:58<1:23:48, 208.03it/s]

finished frames 3723600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620733/1666666 [1:09:58<1:23:22, 209.08it/s]

finished frames 3724200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620839/1666666 [1:09:59<1:24:02, 207.41it/s]

finished frames 3724800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 620923/1666666 [1:09:59<1:23:42, 208.22it/s]

finished frames 3725400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621028/1666666 [1:10:00<1:25:17, 204.31it/s]

finished frames 3726000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621134/1666666 [1:10:00<1:23:39, 208.31it/s]

finished frames 3726600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621239/1666666 [1:10:01<1:23:31, 208.59it/s]

finished frames 3727200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621323/1666666 [1:10:01<1:23:32, 208.55it/s]

finished frames 3727800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621429/1666666 [1:10:02<1:23:18, 209.09it/s]

finished frames 3728400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621537/1666666 [1:10:02<1:21:30, 213.69it/s]

finished frames 3729000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621625/1666666 [1:10:03<1:20:56, 215.18it/s]

finished frames 3729600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621735/1666666 [1:10:03<1:20:56, 215.14it/s]

finished frames 3730200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621823/1666666 [1:10:04<1:20:52, 215.31it/s]

finished frames 3730800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 621933/1666666 [1:10:04<1:25:37, 203.35it/s]

finished frames 3731400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622021/1666666 [1:10:05<1:24:30, 206.01it/s]

finished frames 3732000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622129/1666666 [1:10:05<1:23:42, 207.96it/s]

finished frames 3732600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622239/1666666 [1:10:06<1:21:55, 212.48it/s]

finished frames 3733200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622327/1666666 [1:10:06<1:21:40, 213.11it/s]

finished frames 3733800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622437/1666666 [1:10:07<1:21:33, 213.40it/s]

finished frames 3734400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622525/1666666 [1:10:07<1:21:25, 213.72it/s]

finished frames 3735000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622635/1666666 [1:10:07<1:21:20, 213.91it/s]

finished frames 3735600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622723/1666666 [1:10:08<1:21:26, 213.65it/s]

finished frames 3736200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622833/1666666 [1:10:08<1:21:29, 213.47it/s]

finished frames 3736800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 622943/1666666 [1:10:09<1:21:24, 213.68it/s]

finished frames 3737400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623031/1666666 [1:10:09<1:24:01, 207.00it/s]

finished frames 3738000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623140/1666666 [1:10:10<1:21:52, 212.41it/s]

finished frames 3738600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623228/1666666 [1:10:10<1:21:32, 213.29it/s]

finished frames 3739200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623338/1666666 [1:10:11<1:21:16, 213.93it/s]

finished frames 3739800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623426/1666666 [1:10:11<1:21:07, 214.33it/s]

finished frames 3740400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623536/1666666 [1:10:12<1:21:10, 214.19it/s]

finished frames 3741000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623624/1666666 [1:10:12<1:21:27, 213.40it/s]

finished frames 3741600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623734/1666666 [1:10:13<1:21:17, 213.82it/s]

finished frames 3742200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623844/1666666 [1:10:13<1:21:10, 214.13it/s]

finished frames 3742800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 623932/1666666 [1:10:14<1:21:07, 214.22it/s]

finished frames 3743400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624020/1666666 [1:10:14<1:24:56, 204.58it/s]

finished frames 3744000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624130/1666666 [1:10:14<1:21:30, 213.18it/s]

finished frames 3744600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624240/1666666 [1:10:15<1:21:14, 213.83it/s]

finished frames 3745200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624328/1666666 [1:10:15<1:21:36, 212.89it/s]

finished frames 3745800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624416/1666666 [1:10:16<1:21:17, 213.69it/s]

finished frames 3746400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624525/1666666 [1:10:16<1:22:53, 209.55it/s]

finished frames 3747000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624635/1666666 [1:10:17<1:21:37, 212.77it/s]

finished frames 3747600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624723/1666666 [1:10:17<1:21:13, 213.79it/s]

finished frames 3748200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624833/1666666 [1:10:18<1:20:50, 214.81it/s]

finished frames 3748800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 37%|███▋      | 624943/1666666 [1:10:18<1:20:55, 214.54it/s]

finished frames 3749400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625031/1666666 [1:10:19<1:22:58, 209.22it/s]

finished frames 3750000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625140/1666666 [1:10:19<1:22:20, 210.83it/s]

finished frames 3750600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625227/1666666 [1:10:20<1:22:18, 210.90it/s]

finished frames 3751200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625337/1666666 [1:10:20<1:22:09, 211.24it/s]

finished frames 3751800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625425/1666666 [1:10:21<1:22:14, 211.03it/s]

finished frames 3752400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625533/1666666 [1:10:21<1:22:55, 209.26it/s]

finished frames 3753000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625639/1666666 [1:10:22<1:23:02, 208.92it/s]

finished frames 3753600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625723/1666666 [1:10:22<1:23:33, 207.65it/s]

finished frames 3754200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625829/1666666 [1:10:23<1:23:13, 208.44it/s]

finished frames 3754800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 625934/1666666 [1:10:23<1:23:11, 208.48it/s]

finished frames 3755400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626021/1666666 [1:10:24<1:24:28, 205.30it/s]

finished frames 3756000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626130/1666666 [1:10:24<1:21:58, 211.55it/s]

finished frames 3756600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626240/1666666 [1:10:25<1:21:25, 212.98it/s]

finished frames 3757200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626328/1666666 [1:10:25<1:21:18, 213.25it/s]

finished frames 3757800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626438/1666666 [1:10:25<1:21:34, 212.53it/s]

finished frames 3758400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626526/1666666 [1:10:26<1:21:26, 212.85it/s]

finished frames 3759000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626636/1666666 [1:10:26<1:21:42, 212.16it/s]

finished frames 3759600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626723/1666666 [1:10:27<1:26:23, 200.63it/s]

finished frames 3760200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626831/1666666 [1:10:27<1:33:43, 184.89it/s]

finished frames 3760800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 626940/1666666 [1:10:28<1:22:56, 208.92it/s]

finished frames 3761400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627028/1666666 [1:10:28<1:23:05, 208.53it/s]

finished frames 3762000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627138/1666666 [1:10:29<1:21:13, 213.30it/s]

finished frames 3762600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627226/1666666 [1:10:29<1:20:52, 214.19it/s]

finished frames 3763200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627336/1666666 [1:10:30<1:20:43, 214.59it/s]

finished frames 3763800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627424/1666666 [1:10:30<1:20:54, 214.08it/s]

finished frames 3764400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627534/1666666 [1:10:31<1:20:42, 214.57it/s]

finished frames 3765000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627644/1666666 [1:10:31<1:20:47, 214.33it/s]

finished frames 3765600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627732/1666666 [1:10:32<1:20:50, 214.19it/s]

finished frames 3766200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627842/1666666 [1:10:32<1:20:34, 214.87it/s]

finished frames 3766800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 627930/1666666 [1:10:33<1:20:55, 213.94it/s]

finished frames 3767400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628040/1666666 [1:10:33<1:22:19, 210.26it/s]

finished frames 3768000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628128/1666666 [1:10:34<1:21:06, 213.40it/s]

finished frames 3768600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628238/1666666 [1:10:34<1:21:00, 213.64it/s]

finished frames 3769200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628326/1666666 [1:10:34<1:21:39, 211.92it/s]

finished frames 3769800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628436/1666666 [1:10:35<1:21:52, 211.34it/s]

finished frames 3770400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628524/1666666 [1:10:35<1:22:08, 210.64it/s]

finished frames 3771000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628633/1666666 [1:10:36<1:22:33, 209.56it/s]

finished frames 3771600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628739/1666666 [1:10:36<1:22:57, 208.54it/s]

finished frames 3772200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628823/1666666 [1:10:37<1:23:25, 207.36it/s]

finished frames 3772800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 628928/1666666 [1:10:37<1:23:11, 207.92it/s]

finished frames 3773400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629033/1666666 [1:10:38<1:24:59, 203.50it/s]

finished frames 3774000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629140/1666666 [1:10:38<1:24:36, 204.36it/s]

finished frames 3774600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629226/1666666 [1:10:39<1:25:28, 202.28it/s]

finished frames 3775200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629335/1666666 [1:10:39<1:21:47, 211.40it/s]

finished frames 3775800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629423/1666666 [1:10:40<1:21:10, 212.97it/s]

finished frames 3776400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629533/1666666 [1:10:40<1:21:12, 212.86it/s]

finished frames 3777000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629643/1666666 [1:10:41<1:21:06, 213.11it/s]

finished frames 3777600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629731/1666666 [1:10:41<1:21:01, 213.28it/s]

finished frames 3778200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629841/1666666 [1:10:42<1:21:05, 213.10it/s]

finished frames 3778800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 629929/1666666 [1:10:42<1:21:02, 213.19it/s]

finished frames 3779400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630038/1666666 [1:10:43<1:24:45, 203.85it/s]

finished frames 3780000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630125/1666666 [1:10:43<1:21:09, 212.88it/s]

finished frames 3780600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630235/1666666 [1:10:44<1:20:02, 215.83it/s]

finished frames 3781200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630323/1666666 [1:10:44<1:19:42, 216.69it/s]

finished frames 3781800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630433/1666666 [1:10:44<1:19:31, 217.16it/s]

finished frames 3782400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630543/1666666 [1:10:45<1:19:43, 216.61it/s]

finished frames 3783000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630631/1666666 [1:10:45<1:19:49, 216.32it/s]

finished frames 3783600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630741/1666666 [1:10:46<1:20:24, 214.71it/s]

finished frames 3784200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630829/1666666 [1:10:46<1:20:33, 214.31it/s]

finished frames 3784800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 630939/1666666 [1:10:47<1:20:54, 213.34it/s]

finished frames 3785400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631027/1666666 [1:10:47<1:22:20, 209.63it/s]

finished frames 3786000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631137/1666666 [1:10:48<1:20:53, 213.35it/s]

finished frames 3786600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631225/1666666 [1:10:48<1:20:34, 214.17it/s]

finished frames 3787200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631335/1666666 [1:10:49<1:20:42, 213.80it/s]

finished frames 3787800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631423/1666666 [1:10:49<1:20:34, 214.15it/s]

finished frames 3788400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631533/1666666 [1:10:50<1:20:15, 214.97it/s]

finished frames 3789000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631643/1666666 [1:10:50<1:24:42, 203.63it/s]

finished frames 3789600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631731/1666666 [1:10:51<1:20:54, 213.21it/s]

finished frames 3790200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631841/1666666 [1:10:51<1:19:49, 216.08it/s]

finished frames 3790800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 631929/1666666 [1:10:52<1:19:50, 216.02it/s]

finished frames 3791400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632038/1666666 [1:10:52<1:22:41, 208.53it/s]

finished frames 3792000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632125/1666666 [1:10:52<1:21:38, 211.20it/s]

finished frames 3792600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632234/1666666 [1:10:53<1:22:19, 209.40it/s]

finished frames 3793200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632339/1666666 [1:10:53<1:22:47, 208.20it/s]

finished frames 3793800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632423/1666666 [1:10:54<1:22:46, 208.24it/s]

finished frames 3794400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632531/1666666 [1:10:54<1:21:35, 211.26it/s]

finished frames 3795000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632641/1666666 [1:10:55<1:21:27, 211.57it/s]

finished frames 3795600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632729/1666666 [1:10:55<1:21:03, 212.58it/s]

finished frames 3796200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632839/1666666 [1:10:56<1:20:44, 213.40it/s]

finished frames 3796800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 632927/1666666 [1:10:56<1:20:35, 213.79it/s]

finished frames 3797400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633037/1666666 [1:10:57<1:22:19, 209.25it/s]

finished frames 3798000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633124/1666666 [1:10:57<1:21:13, 212.08it/s]

finished frames 3798600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633234/1666666 [1:10:58<1:20:53, 212.91it/s]

finished frames 3799200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633344/1666666 [1:10:58<1:20:22, 214.29it/s]

finished frames 3799800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633432/1666666 [1:10:59<1:20:43, 213.34it/s]

finished frames 3800400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633542/1666666 [1:10:59<1:20:29, 213.91it/s]

finished frames 3801000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633630/1666666 [1:11:00<1:20:35, 213.63it/s]

finished frames 3801600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633740/1666666 [1:11:00<1:20:26, 214.00it/s]

finished frames 3802200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633828/1666666 [1:11:01<1:20:22, 214.19it/s]

finished frames 3802800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 633938/1666666 [1:11:01<1:20:18, 214.33it/s]

finished frames 3803400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634024/1666666 [1:11:01<1:25:34, 201.13it/s]

finished frames 3804000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634134/1666666 [1:11:02<1:21:09, 212.02it/s]

finished frames 3804600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634222/1666666 [1:11:02<1:21:02, 212.34it/s]

finished frames 3805200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634332/1666666 [1:11:03<1:20:54, 212.65it/s]

finished frames 3805800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634442/1666666 [1:11:03<1:21:22, 211.42it/s]

finished frames 3806400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634530/1666666 [1:11:04<1:21:12, 211.81it/s]

finished frames 3807000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634640/1666666 [1:11:04<1:21:08, 211.99it/s]

finished frames 3807600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634728/1666666 [1:11:05<1:21:05, 212.11it/s]

finished frames 3808200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634838/1666666 [1:11:05<1:20:59, 212.32it/s]

finished frames 3808800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 634926/1666666 [1:11:06<1:21:02, 212.17it/s]

finished frames 3809400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635036/1666666 [1:11:06<1:22:45, 207.75it/s]

finished frames 3810000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635123/1666666 [1:11:07<1:21:33, 210.78it/s]

finished frames 3810600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635233/1666666 [1:11:07<1:21:10, 211.78it/s]

finished frames 3811200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635343/1666666 [1:11:08<1:21:02, 212.11it/s]

finished frames 3811800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635431/1666666 [1:11:08<1:21:14, 211.58it/s]

finished frames 3812400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635541/1666666 [1:11:09<1:20:51, 212.55it/s]

finished frames 3813000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635629/1666666 [1:11:09<1:20:50, 212.54it/s]

finished frames 3813600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635739/1666666 [1:11:10<1:20:55, 212.32it/s]

finished frames 3814200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635827/1666666 [1:11:10<1:21:05, 211.88it/s]

finished frames 3814800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 635937/1666666 [1:11:11<1:20:57, 212.20it/s]

finished frames 3815400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636024/1666666 [1:11:11<1:23:03, 206.79it/s]

finished frames 3816000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636133/1666666 [1:11:11<1:21:49, 209.89it/s]

finished frames 3816600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636218/1666666 [1:11:12<1:30:36, 189.53it/s]

finished frames 3817200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636324/1666666 [1:11:12<1:24:39, 202.86it/s]

finished frames 3817800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636429/1666666 [1:11:13<1:23:07, 206.57it/s]

finished frames 3818400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636534/1666666 [1:11:13<1:22:51, 207.21it/s]

finished frames 3819000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636643/1666666 [1:11:14<1:20:53, 212.21it/s]

finished frames 3819600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636731/1666666 [1:11:14<1:20:24, 213.50it/s]

finished frames 3820200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636841/1666666 [1:11:15<1:20:14, 213.88it/s]

finished frames 3820800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 636929/1666666 [1:11:15<1:20:07, 214.20it/s]

finished frames 3821400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637039/1666666 [1:11:16<1:22:02, 209.18it/s]

finished frames 3822000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637126/1666666 [1:11:16<1:20:33, 213.02it/s]

finished frames 3822600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637236/1666666 [1:11:17<1:20:11, 213.96it/s]

finished frames 3823200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637324/1666666 [1:11:17<1:20:07, 214.13it/s]

finished frames 3823800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637434/1666666 [1:11:18<1:20:21, 213.48it/s]

finished frames 3824400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637544/1666666 [1:11:18<1:20:02, 214.29it/s]

finished frames 3825000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637632/1666666 [1:11:19<1:20:09, 213.94it/s]

finished frames 3825600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637742/1666666 [1:11:19<1:20:05, 214.13it/s]

finished frames 3826200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637830/1666666 [1:11:20<1:20:06, 214.06it/s]

finished frames 3826800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 637940/1666666 [1:11:20<1:20:10, 213.83it/s]

finished frames 3827400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638028/1666666 [1:11:20<1:21:55, 209.28it/s]

finished frames 3828000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638138/1666666 [1:11:21<1:20:19, 213.42it/s]

finished frames 3828600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638226/1666666 [1:11:21<1:20:02, 214.14it/s]

finished frames 3829200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638336/1666666 [1:11:22<1:20:01, 214.16it/s]

finished frames 3829800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638424/1666666 [1:11:22<1:20:08, 213.84it/s]

finished frames 3830400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638534/1666666 [1:11:23<1:19:46, 214.79it/s]

finished frames 3831000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638644/1666666 [1:11:23<1:23:32, 205.11it/s]

finished frames 3831600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638731/1666666 [1:11:24<1:21:00, 211.48it/s]

finished frames 3832200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638841/1666666 [1:11:24<1:20:48, 212.00it/s]

finished frames 3832800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 638927/1666666 [1:11:25<1:22:06, 208.63it/s]

finished frames 3833400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639032/1666666 [1:11:25<1:23:52, 204.18it/s]

finished frames 3834000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639137/1666666 [1:11:26<1:22:19, 208.03it/s]

finished frames 3834600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639243/1666666 [1:11:26<1:21:59, 208.83it/s]

finished frames 3835200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639327/1666666 [1:11:27<1:22:27, 207.63it/s]

finished frames 3835800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639433/1666666 [1:11:27<1:21:53, 209.04it/s]

finished frames 3836400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639539/1666666 [1:11:28<1:21:48, 209.24it/s]

finished frames 3837000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639624/1666666 [1:11:28<1:21:46, 209.31it/s]

finished frames 3837600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639730/1666666 [1:11:29<1:21:52, 209.04it/s]

finished frames 3838200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639836/1666666 [1:11:29<1:22:01, 208.66it/s]

finished frames 3838800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 639923/1666666 [1:11:30<1:20:49, 211.71it/s]

finished frames 3839400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640033/1666666 [1:11:30<1:22:03, 208.53it/s]

finished frames 3840000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640142/1666666 [1:11:31<1:20:43, 211.94it/s]

finished frames 3840600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640230/1666666 [1:11:31<1:20:28, 212.56it/s]

finished frames 3841200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640340/1666666 [1:11:31<1:20:36, 212.20it/s]

finished frames 3841800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640428/1666666 [1:11:32<1:20:15, 213.11it/s]

finished frames 3842400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640538/1666666 [1:11:32<1:20:21, 212.82it/s]

finished frames 3843000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640626/1666666 [1:11:33<1:20:18, 212.94it/s]

finished frames 3843600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640736/1666666 [1:11:33<1:20:17, 212.95it/s]

finished frames 3844200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640824/1666666 [1:11:34<1:20:10, 213.26it/s]

finished frames 3844800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 640934/1666666 [1:11:34<1:20:11, 213.20it/s]

finished frames 3845400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 641021/1666666 [1:11:35<1:27:18, 195.80it/s]

finished frames 3846000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 641129/1666666 [1:11:35<1:25:07, 200.79it/s]

finished frames 3846600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 641238/1666666 [1:11:36<1:21:03, 210.84it/s]

finished frames 3847200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 641326/1666666 [1:11:36<1:20:27, 212.41it/s]

finished frames 3847800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 641436/1666666 [1:11:37<1:20:21, 212.65it/s]

finished frames 3848400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 641524/1666666 [1:11:37<1:20:04, 213.37it/s]

finished frames 3849000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 38%|███▊      | 641634/1666666 [1:11:38<1:20:11, 213.03it/s]

finished frames 3849600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 641744/1666666 [1:11:38<1:20:11, 213.02it/s]

finished frames 3850200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 641832/1666666 [1:11:39<1:20:04, 213.31it/s]

finished frames 3850800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 641942/1666666 [1:11:39<1:19:58, 213.54it/s]

finished frames 3851400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642030/1666666 [1:11:40<1:21:58, 208.34it/s]

finished frames 3852000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642139/1666666 [1:11:40<1:20:31, 212.03it/s]

finished frames 3852600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642227/1666666 [1:11:40<1:20:19, 212.55it/s]

finished frames 3853200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642337/1666666 [1:11:41<1:20:04, 213.22it/s]

finished frames 3853800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642425/1666666 [1:11:41<1:20:16, 212.66it/s]

finished frames 3854400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642535/1666666 [1:11:42<1:20:05, 213.11it/s]

finished frames 3855000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642623/1666666 [1:11:42<1:20:03, 213.21it/s]

finished frames 3855600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642733/1666666 [1:11:43<1:20:00, 213.29it/s]

finished frames 3856200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642843/1666666 [1:11:43<1:19:58, 213.35it/s]

finished frames 3856800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 642931/1666666 [1:11:44<1:20:08, 212.91it/s]

finished frames 3857400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643041/1666666 [1:11:44<1:21:31, 209.26it/s]

finished frames 3858000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643128/1666666 [1:11:45<1:20:29, 211.93it/s]

finished frames 3858600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643238/1666666 [1:11:45<1:19:49, 213.67it/s]

finished frames 3859200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643326/1666666 [1:11:46<1:19:54, 213.43it/s]

finished frames 3859800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643436/1666666 [1:11:46<1:19:56, 213.35it/s]

finished frames 3860400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643524/1666666 [1:11:47<1:23:44, 203.65it/s]

finished frames 3861000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643633/1666666 [1:11:47<1:20:23, 212.07it/s]

finished frames 3861600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643743/1666666 [1:11:48<1:19:44, 213.81it/s]

finished frames 3862200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643831/1666666 [1:11:48<1:19:41, 213.94it/s]

finished frames 3862800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 643941/1666666 [1:11:49<1:20:02, 212.97it/s]

finished frames 3863400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644029/1666666 [1:11:49<1:22:04, 207.68it/s]

finished frames 3864000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644138/1666666 [1:11:50<1:20:33, 211.54it/s]

finished frames 3864600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644226/1666666 [1:11:50<1:20:33, 211.53it/s]

finished frames 3865200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644336/1666666 [1:11:50<1:20:19, 212.14it/s]

finished frames 3865800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644424/1666666 [1:11:51<1:20:13, 212.37it/s]

finished frames 3866400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644534/1666666 [1:11:51<1:20:03, 212.79it/s]

finished frames 3867000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644622/1666666 [1:11:52<1:20:07, 212.58it/s]

finished frames 3867600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644732/1666666 [1:11:52<1:20:04, 212.71it/s]

finished frames 3868200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644842/1666666 [1:11:53<1:19:57, 212.99it/s]

finished frames 3868800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 644930/1666666 [1:11:53<1:20:22, 211.87it/s]

finished frames 3869400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 645040/1666666 [1:11:54<1:21:05, 209.98it/s]

finished frames 3870000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 645128/1666666 [1:11:54<1:19:26, 214.31it/s]

finished frames 3870600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 645238/1666666 [1:11:55<1:20:41, 210.99it/s]

finished frames 3871200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 645326/1666666 [1:11:55<1:20:26, 211.63it/s]

finished frames 3871800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 645440/1666666 [1:11:56<1:17:25, 219.85it/s]

finished frames 3872400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 645529/1666666 [1:11:56<1:17:53, 218.51it/s]

finished frames 3873000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 645640/1666666 [1:11:57<1:20:33, 211.23it/s]

finished frames 3873600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 645725/1666666 [1:11:57<1:32:30, 183.95it/s]

finished frames 3874200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▊      | 645832/1666666 [1:11:58<1:22:06, 207.22it/s]

finished frames 3874800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 645942/1666666 [1:11:58<1:19:58, 212.71it/s]

finished frames 3875400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646030/1666666 [1:11:58<1:21:19, 209.17it/s]

finished frames 3876000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646138/1666666 [1:11:59<1:20:23, 211.56it/s]

finished frames 3876600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646226/1666666 [1:11:59<1:19:41, 213.42it/s]

finished frames 3877200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646336/1666666 [1:12:00<1:20:31, 211.20it/s]

finished frames 3877800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646424/1666666 [1:12:00<1:20:39, 210.82it/s]

finished frames 3878400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646534/1666666 [1:12:01<1:20:57, 210.01it/s]

finished frames 3879000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646640/1666666 [1:12:01<1:21:35, 208.35it/s]

finished frames 3879600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646724/1666666 [1:12:02<1:21:51, 207.66it/s]

finished frames 3880200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646829/1666666 [1:12:02<1:21:49, 207.75it/s]

finished frames 3880800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 646935/1666666 [1:12:03<1:21:46, 207.84it/s]

finished frames 3881400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647040/1666666 [1:12:03<1:23:43, 202.96it/s]

finished frames 3882000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647124/1666666 [1:12:04<1:22:23, 206.24it/s]

finished frames 3882600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647229/1666666 [1:12:04<1:21:57, 207.30it/s]

finished frames 3883200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647334/1666666 [1:12:05<1:21:50, 207.58it/s]

finished frames 3883800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647439/1666666 [1:12:05<1:21:46, 207.74it/s]

finished frames 3884400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647523/1666666 [1:12:06<1:22:01, 207.07it/s]

finished frames 3885000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647628/1666666 [1:12:06<1:21:54, 207.36it/s]

finished frames 3885600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647733/1666666 [1:12:07<1:21:47, 207.64it/s]

finished frames 3886200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647841/1666666 [1:12:07<1:21:16, 208.92it/s]

finished frames 3886800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 647928/1666666 [1:12:08<1:19:55, 212.45it/s]

finished frames 3887400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648037/1666666 [1:12:08<1:21:51, 207.39it/s]

finished frames 3888000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648123/1666666 [1:12:09<1:23:38, 202.95it/s]

finished frames 3888600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648231/1666666 [1:12:09<1:23:29, 203.29it/s]

finished frames 3889200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648340/1666666 [1:12:10<1:20:08, 211.79it/s]

finished frames 3889800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648428/1666666 [1:12:10<1:19:21, 213.85it/s]

finished frames 3890400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648537/1666666 [1:12:11<1:20:03, 211.96it/s]

finished frames 3891000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648625/1666666 [1:12:11<1:19:19, 213.88it/s]

finished frames 3891600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648735/1666666 [1:12:11<1:19:13, 214.15it/s]

finished frames 3892200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648823/1666666 [1:12:12<1:19:19, 213.87it/s]

finished frames 3892800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 648933/1666666 [1:12:12<1:19:12, 214.14it/s]

finished frames 3893400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649021/1666666 [1:12:13<1:21:25, 208.29it/s]

finished frames 3894000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649131/1666666 [1:12:13<1:19:36, 213.03it/s]

finished frames 3894600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649241/1666666 [1:12:14<1:19:08, 214.26it/s]

finished frames 3895200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649329/1666666 [1:12:14<1:19:07, 214.31it/s]

finished frames 3895800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649439/1666666 [1:12:15<1:19:33, 213.09it/s]

finished frames 3896400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649527/1666666 [1:12:15<1:19:31, 213.15it/s]

finished frames 3897000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649637/1666666 [1:12:16<1:19:27, 213.32it/s]

finished frames 3897600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649725/1666666 [1:12:16<1:19:20, 213.61it/s]

finished frames 3898200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649835/1666666 [1:12:17<1:19:15, 213.84it/s]

finished frames 3898800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 649923/1666666 [1:12:17<1:19:28, 213.20it/s]

finished frames 3899400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650033/1666666 [1:12:18<1:21:20, 208.32it/s]

finished frames 3900000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650141/1666666 [1:12:18<1:20:22, 210.77it/s]

finished frames 3900600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650229/1666666 [1:12:19<1:20:33, 210.27it/s]

finished frames 3901200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650335/1666666 [1:12:19<1:21:11, 208.61it/s]

finished frames 3901800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650419/1666666 [1:12:19<1:21:28, 207.90it/s]

finished frames 3902400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650527/1666666 [1:12:20<1:21:42, 207.27it/s]

finished frames 3903000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650635/1666666 [1:12:21<1:23:28, 202.86it/s]

finished frames 3903600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650743/1666666 [1:12:21<1:20:26, 210.49it/s]

finished frames 3904200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650831/1666666 [1:12:21<1:19:24, 213.21it/s]

finished frames 3904800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 650941/1666666 [1:12:22<1:19:08, 213.91it/s]

finished frames 3905400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651029/1666666 [1:12:22<1:20:59, 209.00it/s]

finished frames 3906000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651139/1666666 [1:12:23<1:19:17, 213.47it/s]

finished frames 3906600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651227/1666666 [1:12:23<1:18:50, 214.66it/s]

finished frames 3907200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651337/1666666 [1:12:24<1:19:07, 213.86it/s]

finished frames 3907800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651425/1666666 [1:12:24<1:19:13, 213.58it/s]

finished frames 3908400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651535/1666666 [1:12:25<1:19:01, 214.11it/s]

finished frames 3909000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651623/1666666 [1:12:25<1:18:53, 214.46it/s]

finished frames 3909600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651733/1666666 [1:12:26<1:19:06, 213.81it/s]

finished frames 3910200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651843/1666666 [1:12:26<1:20:00, 211.40it/s]

finished frames 3910800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 651931/1666666 [1:12:27<1:20:14, 210.77it/s]

finished frames 3911400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652040/1666666 [1:12:27<1:22:00, 206.19it/s]

finished frames 3912000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652127/1666666 [1:12:28<1:20:25, 210.26it/s]

finished frames 3912600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652237/1666666 [1:12:28<1:19:59, 211.38it/s]

finished frames 3913200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652325/1666666 [1:12:29<1:19:57, 211.41it/s]

finished frames 3913800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652435/1666666 [1:12:29<1:19:55, 211.48it/s]

finished frames 3914400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652523/1666666 [1:12:29<1:19:55, 211.48it/s]

finished frames 3915000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652633/1666666 [1:12:30<1:19:58, 211.34it/s]

finished frames 3915600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652743/1666666 [1:12:30<1:20:14, 210.59it/s]

finished frames 3916200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652831/1666666 [1:12:31<1:20:28, 209.97it/s]

finished frames 3916800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 652917/1666666 [1:12:31<1:20:16, 210.47it/s]

finished frames 3917400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653023/1666666 [1:12:32<1:25:43, 197.08it/s]

finished frames 3918000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653129/1666666 [1:12:32<1:21:33, 207.13it/s]

finished frames 3918600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653237/1666666 [1:12:33<1:20:36, 209.54it/s]

finished frames 3919200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653324/1666666 [1:12:33<1:19:47, 211.65it/s]

finished frames 3919800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653434/1666666 [1:12:34<1:19:15, 213.07it/s]

finished frames 3920400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653544/1666666 [1:12:34<1:19:06, 213.45it/s]

finished frames 3921000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653632/1666666 [1:12:35<1:18:58, 213.80it/s]

finished frames 3921600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653742/1666666 [1:12:35<1:19:10, 213.24it/s]

finished frames 3922200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653830/1666666 [1:12:36<1:19:04, 213.45it/s]

finished frames 3922800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 653940/1666666 [1:12:36<1:19:00, 213.62it/s]

finished frames 3923400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654028/1666666 [1:12:37<1:20:47, 208.91it/s]

finished frames 3924000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654137/1666666 [1:12:37<1:19:04, 213.42it/s]

finished frames 3924600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654225/1666666 [1:12:38<1:18:44, 214.31it/s]

finished frames 3925200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654335/1666666 [1:12:38<1:18:37, 214.57it/s]

finished frames 3925800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654423/1666666 [1:12:39<1:18:59, 213.56it/s]

finished frames 3926400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654533/1666666 [1:12:39<1:18:39, 214.48it/s]

finished frames 3927000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654643/1666666 [1:12:40<1:18:32, 214.73it/s]

finished frames 3927600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654731/1666666 [1:12:40<1:18:46, 214.08it/s]

finished frames 3928200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654841/1666666 [1:12:40<1:18:43, 214.21it/s]

finished frames 3928800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 654929/1666666 [1:12:41<1:18:50, 213.87it/s]

finished frames 3929400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655039/1666666 [1:12:41<1:20:24, 209.70it/s]

finished frames 3930000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655127/1666666 [1:12:42<1:19:09, 212.98it/s]

finished frames 3930600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655237/1666666 [1:12:42<1:22:15, 204.91it/s]

finished frames 3931200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655325/1666666 [1:12:43<1:19:52, 211.01it/s]

finished frames 3931800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655435/1666666 [1:12:43<1:19:07, 212.99it/s]

finished frames 3932400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655523/1666666 [1:12:44<1:18:51, 213.72it/s]

finished frames 3933000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655633/1666666 [1:12:44<1:18:58, 213.36it/s]

finished frames 3933600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655743/1666666 [1:12:45<1:18:46, 213.90it/s]

finished frames 3934200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655831/1666666 [1:12:45<1:19:12, 212.71it/s]

finished frames 3934800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 655941/1666666 [1:12:46<1:19:02, 213.13it/s]

finished frames 3935400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656029/1666666 [1:12:46<1:20:49, 208.41it/s]

finished frames 3936000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656138/1666666 [1:12:47<1:19:45, 211.18it/s]

finished frames 3936600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656226/1666666 [1:12:47<1:19:02, 213.08it/s]

finished frames 3937200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656336/1666666 [1:12:48<1:18:51, 213.52it/s]

finished frames 3937800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656424/1666666 [1:12:48<1:18:50, 213.58it/s]

finished frames 3938400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656534/1666666 [1:12:48<1:18:50, 213.52it/s]

finished frames 3939000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656644/1666666 [1:12:49<1:18:57, 213.21it/s]

finished frames 3939600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656732/1666666 [1:12:49<1:18:46, 213.66it/s]

finished frames 3940200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656842/1666666 [1:12:50<1:18:41, 213.87it/s]

finished frames 3940800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 656930/1666666 [1:12:50<1:18:49, 213.51it/s]

finished frames 3941400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657040/1666666 [1:12:51<1:20:38, 208.65it/s]

finished frames 3942000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657127/1666666 [1:12:51<1:19:16, 212.23it/s]

finished frames 3942600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657237/1666666 [1:12:52<1:18:53, 213.27it/s]

finished frames 3943200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657323/1666666 [1:12:52<1:20:39, 208.58it/s]

finished frames 3943800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657428/1666666 [1:12:53<1:20:35, 208.70it/s]

finished frames 3944400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657534/1666666 [1:12:53<1:20:27, 209.03it/s]

finished frames 3945000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657640/1666666 [1:12:54<1:20:24, 209.13it/s]

finished frames 3945600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657724/1666666 [1:12:54<1:25:35, 196.48it/s]

finished frames 3946200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657829/1666666 [1:12:55<1:21:25, 206.52it/s]

finished frames 3946800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 657934/1666666 [1:12:55<1:20:45, 208.16it/s]

finished frames 3947400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 658039/1666666 [1:12:56<1:22:31, 203.70it/s]

finished frames 3948000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 658123/1666666 [1:12:56<1:21:04, 207.35it/s]

finished frames 3948600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 39%|███▉      | 658229/1666666 [1:12:57<1:20:40, 208.32it/s]

finished frames 3949200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 658334/1666666 [1:12:57<1:20:42, 208.21it/s]

finished frames 3949800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 658439/1666666 [1:12:58<1:20:43, 208.18it/s]

finished frames 3950400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 658523/1666666 [1:12:58<1:20:45, 208.06it/s]

finished frames 3951000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 658628/1666666 [1:12:59<1:20:46, 208.00it/s]

finished frames 3951600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 658733/1666666 [1:12:59<1:20:31, 208.60it/s]

finished frames 3952200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 658839/1666666 [1:13:00<1:20:41, 208.16it/s]

finished frames 3952800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 658924/1666666 [1:13:00<1:20:36, 208.34it/s]

finished frames 3953400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659030/1666666 [1:13:00<1:21:53, 205.08it/s]

finished frames 3954000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659138/1666666 [1:13:01<1:20:02, 209.80it/s]

finished frames 3954600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659224/1666666 [1:13:01<1:19:53, 210.18it/s]

finished frames 3955200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659332/1666666 [1:13:02<1:20:11, 209.35it/s]

finished frames 3955800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659439/1666666 [1:13:02<1:20:03, 209.69it/s]

finished frames 3956400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659524/1666666 [1:13:03<1:20:09, 209.42it/s]

finished frames 3957000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659630/1666666 [1:13:03<1:20:21, 208.85it/s]

finished frames 3957600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659737/1666666 [1:13:04<1:20:06, 209.50it/s]

finished frames 3958200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659842/1666666 [1:13:04<1:20:28, 208.52it/s]

finished frames 3958800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 659927/1666666 [1:13:05<1:20:31, 208.39it/s]

finished frames 3959400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660032/1666666 [1:13:05<1:29:10, 188.16it/s]

finished frames 3960000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660137/1666666 [1:13:06<1:21:57, 204.68it/s]

finished frames 3960600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660242/1666666 [1:13:06<1:20:40, 207.90it/s]

finished frames 3961200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660326/1666666 [1:13:07<1:20:31, 208.27it/s]

finished frames 3961800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660431/1666666 [1:13:07<1:20:41, 207.82it/s]

finished frames 3962400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660536/1666666 [1:13:08<1:20:33, 208.16it/s]

finished frames 3963000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660642/1666666 [1:13:08<1:20:14, 208.94it/s]

finished frames 3963600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660727/1666666 [1:13:09<1:20:26, 208.41it/s]

finished frames 3964200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660833/1666666 [1:13:09<1:20:17, 208.81it/s]

finished frames 3964800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 660938/1666666 [1:13:10<1:20:30, 208.22it/s]

finished frames 3965400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661022/1666666 [1:13:10<1:22:44, 202.56it/s]

finished frames 3966000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661127/1666666 [1:13:11<1:20:50, 207.30it/s]

finished frames 3966600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661232/1666666 [1:13:11<1:20:33, 208.04it/s]

finished frames 3967200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661337/1666666 [1:13:12<1:20:18, 208.65it/s]

finished frames 3967800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661422/1666666 [1:13:12<1:20:17, 208.66it/s]

finished frames 3968400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661529/1666666 [1:13:13<1:20:05, 209.17it/s]

finished frames 3969000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661635/1666666 [1:13:13<1:20:09, 208.97it/s]

finished frames 3969600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661742/1666666 [1:13:14<1:19:59, 209.39it/s]

finished frames 3970200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661826/1666666 [1:13:14<1:20:16, 208.63it/s]

finished frames 3970800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 661934/1666666 [1:13:14<1:19:58, 209.40it/s]

finished frames 3971400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662041/1666666 [1:13:15<1:21:42, 204.92it/s]

finished frames 3972000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662127/1666666 [1:13:15<1:20:28, 208.05it/s]

finished frames 3972600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662233/1666666 [1:13:16<1:20:05, 209.03it/s]

finished frames 3973200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662340/1666666 [1:13:16<1:19:49, 209.71it/s]

finished frames 3973800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662425/1666666 [1:13:17<1:22:28, 202.92it/s]

finished frames 3974400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662534/1666666 [1:13:17<1:20:07, 208.86it/s]

finished frames 3975000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662639/1666666 [1:13:18<1:20:22, 208.18it/s]

finished frames 3975600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662725/1666666 [1:13:18<1:20:09, 208.74it/s]

finished frames 3976200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662830/1666666 [1:13:19<1:20:11, 208.65it/s]

finished frames 3976800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 662938/1666666 [1:13:19<1:19:39, 210.01it/s]

finished frames 3977400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663024/1666666 [1:13:20<1:21:33, 205.11it/s]

finished frames 3978000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663130/1666666 [1:13:20<1:20:19, 208.21it/s]

finished frames 3978600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663237/1666666 [1:13:21<1:19:43, 209.77it/s]

finished frames 3979200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663324/1666666 [1:13:21<1:19:39, 209.91it/s]

finished frames 3979800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663429/1666666 [1:13:22<1:19:56, 209.14it/s]

finished frames 3980400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663534/1666666 [1:13:22<1:19:56, 209.14it/s]

finished frames 3981000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663642/1666666 [1:13:23<1:19:35, 210.02it/s]

finished frames 3981600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663729/1666666 [1:13:23<1:19:39, 209.85it/s]

finished frames 3982200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663837/1666666 [1:13:24<1:19:32, 210.13it/s]

finished frames 3982800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 663923/1666666 [1:13:24<1:19:51, 209.26it/s]

finished frames 3983400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664029/1666666 [1:13:25<1:21:43, 204.49it/s]

finished frames 3984000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664134/1666666 [1:13:25<1:20:16, 208.16it/s]

finished frames 3984600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664239/1666666 [1:13:26<1:20:20, 207.96it/s]

finished frames 3985200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664323/1666666 [1:13:26<1:20:14, 208.20it/s]

finished frames 3985800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664429/1666666 [1:13:26<1:19:55, 209.00it/s]

finished frames 3986400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664537/1666666 [1:13:27<1:19:40, 209.62it/s]

finished frames 3987000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664623/1666666 [1:13:27<1:19:43, 209.47it/s]

finished frames 3987600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664728/1666666 [1:13:28<1:19:55, 208.93it/s]

finished frames 3988200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664835/1666666 [1:13:28<1:19:29, 210.07it/s]

finished frames 3988800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 664922/1666666 [1:13:29<1:19:36, 209.72it/s]

finished frames 3989400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665028/1666666 [1:13:29<1:21:38, 204.47it/s]

finished frames 3990000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665134/1666666 [1:13:30<1:20:02, 208.55it/s]

finished frames 3990600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665241/1666666 [1:13:30<1:19:41, 209.42it/s]

finished frames 3991200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665327/1666666 [1:13:31<1:19:40, 209.47it/s]

finished frames 3991800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665433/1666666 [1:13:31<1:19:35, 209.66it/s]

finished frames 3992400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665541/1666666 [1:13:32<1:19:27, 210.00it/s]

finished frames 3993000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665627/1666666 [1:13:32<1:19:32, 209.76it/s]

finished frames 3993600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665733/1666666 [1:13:33<1:19:29, 209.84it/s]

finished frames 3994200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665841/1666666 [1:13:33<1:19:32, 209.72it/s]

finished frames 3994800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 665927/1666666 [1:13:34<1:19:37, 209.45it/s]

finished frames 3995400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 666032/1666666 [1:13:34<1:21:31, 204.57it/s]

finished frames 3996000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 666140/1666666 [1:13:35<1:19:54, 208.67it/s]

finished frames 3996600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 666225/1666666 [1:13:35<1:19:40, 209.27it/s]

finished frames 3997200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 666331/1666666 [1:13:36<1:19:42, 209.15it/s]

finished frames 3997800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 666436/1666666 [1:13:36<1:19:53, 208.66it/s]

finished frames 3998400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 666541/1666666 [1:13:37<1:19:46, 208.95it/s]

finished frames 3999000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|███▉      | 666625/1666666 [1:13:37<1:19:50, 208.75it/s]

finished frames 3999600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 666731/1666666 [1:13:38<1:19:37, 209.29it/s]

finished frames 4000200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 666837/1666666 [1:13:38<1:19:39, 209.19it/s]

finished frames 4000800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 666922/1666666 [1:13:38<1:19:35, 209.34it/s]

finished frames 4001400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667027/1666666 [1:13:39<1:24:45, 196.56it/s]

finished frames 4002000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667132/1666666 [1:13:40<1:22:18, 202.41it/s]

finished frames 4002600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667237/1666666 [1:13:40<1:20:34, 206.74it/s]

finished frames 4003200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667342/1666666 [1:13:41<1:19:49, 208.63it/s]

finished frames 4003800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667426/1666666 [1:13:41<1:20:05, 207.96it/s]

finished frames 4004400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667531/1666666 [1:13:41<1:20:03, 207.99it/s]

finished frames 4005000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667636/1666666 [1:13:42<1:19:57, 208.23it/s]

finished frames 4005600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667742/1666666 [1:13:42<1:19:47, 208.66it/s]

finished frames 4006200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667826/1666666 [1:13:43<1:19:54, 208.33it/s]

finished frames 4006800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 667931/1666666 [1:13:43<1:20:01, 208.01it/s]

finished frames 4007400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668036/1666666 [1:13:44<1:21:32, 204.12it/s]

finished frames 4008000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668141/1666666 [1:13:44<1:20:17, 207.29it/s]

finished frames 4008600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668225/1666666 [1:13:45<1:19:57, 208.11it/s]

finished frames 4009200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668331/1666666 [1:13:45<1:19:43, 208.71it/s]

finished frames 4009800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668436/1666666 [1:13:46<1:19:54, 208.21it/s]

finished frames 4010400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668541/1666666 [1:13:46<1:19:52, 208.28it/s]

finished frames 4011000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668625/1666666 [1:13:47<1:20:30, 206.61it/s]

finished frames 4011600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668730/1666666 [1:13:47<1:20:01, 207.82it/s]

finished frames 4012200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668835/1666666 [1:13:48<1:19:58, 207.96it/s]

finished frames 4012800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 668940/1666666 [1:13:48<1:19:50, 208.29it/s]

finished frames 4013400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669024/1666666 [1:13:49<1:21:34, 203.85it/s]

finished frames 4014000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669129/1666666 [1:13:49<1:20:19, 207.00it/s]

finished frames 4014600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669235/1666666 [1:13:50<1:19:34, 208.91it/s]

finished frames 4015200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669341/1666666 [1:13:50<1:21:56, 202.86it/s]

finished frames 4015800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669426/1666666 [1:13:51<1:23:38, 198.72it/s]

finished frames 4016400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669534/1666666 [1:13:51<1:20:08, 207.36it/s]

finished frames 4017000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669642/1666666 [1:13:52<1:19:11, 209.83it/s]

finished frames 4017600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669727/1666666 [1:13:52<1:19:18, 209.50it/s]

finished frames 4018200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669834/1666666 [1:13:53<1:19:19, 209.45it/s]

finished frames 4018800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 669940/1666666 [1:13:53<1:20:36, 206.10it/s]

finished frames 4019400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670026/1666666 [1:13:54<1:20:02, 207.52it/s]

finished frames 4020000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670135/1666666 [1:13:54<1:19:10, 209.76it/s]

finished frames 4020600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670223/1666666 [1:13:54<1:19:24, 209.13it/s]

finished frames 4021200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670330/1666666 [1:13:55<1:19:18, 209.39it/s]

finished frames 4021800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670441/1666666 [1:13:56<1:16:34, 216.81it/s]

finished frames 4022400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670530/1666666 [1:13:56<1:17:25, 214.45it/s]

finished frames 4023000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670642/1666666 [1:13:56<1:17:03, 215.41it/s]

finished frames 4023600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670730/1666666 [1:13:57<1:18:53, 210.41it/s]

finished frames 4024200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670838/1666666 [1:13:57<1:18:49, 210.57it/s]

finished frames 4024800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 670926/1666666 [1:13:58<1:17:58, 212.83it/s]

finished frames 4025400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671036/1666666 [1:13:58<1:19:17, 209.27it/s]

finished frames 4026000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671123/1666666 [1:13:59<1:19:46, 207.97it/s]

finished frames 4026600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671231/1666666 [1:13:59<1:18:02, 212.60it/s]

finished frames 4027200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671341/1666666 [1:14:00<1:17:20, 214.49it/s]

finished frames 4027800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671429/1666666 [1:14:00<1:17:19, 214.50it/s]

finished frames 4028400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671539/1666666 [1:14:01<1:17:07, 215.05it/s]

finished frames 4029000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671627/1666666 [1:14:01<1:24:09, 197.07it/s]

finished frames 4029600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671737/1666666 [1:14:02<1:18:16, 211.86it/s]

finished frames 4030200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671825/1666666 [1:14:02<1:17:28, 214.01it/s]

finished frames 4030800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 671935/1666666 [1:14:03<1:17:28, 214.01it/s]

finished frames 4031400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672022/1666666 [1:14:03<1:19:24, 208.77it/s]

finished frames 4032000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672131/1666666 [1:14:04<1:18:35, 210.93it/s]

finished frames 4032600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672241/1666666 [1:14:04<1:18:29, 211.14it/s]

finished frames 4033200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672329/1666666 [1:14:04<1:18:05, 212.23it/s]

finished frames 4033800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672439/1666666 [1:14:05<1:18:20, 211.53it/s]

finished frames 4034400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672527/1666666 [1:14:05<1:18:20, 211.48it/s]

finished frames 4035000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672637/1666666 [1:14:06<1:18:12, 211.84it/s]

finished frames 4035600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672725/1666666 [1:14:06<1:18:18, 211.54it/s]

finished frames 4036200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672835/1666666 [1:14:07<1:18:18, 211.50it/s]

finished frames 4036800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 672923/1666666 [1:14:07<1:18:29, 211.02it/s]

finished frames 4037400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673033/1666666 [1:14:08<1:19:55, 207.22it/s]

finished frames 4038000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673142/1666666 [1:14:08<1:18:40, 210.46it/s]

finished frames 4038600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673230/1666666 [1:14:09<1:18:28, 210.98it/s]

finished frames 4039200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673340/1666666 [1:14:09<1:18:21, 211.29it/s]

finished frames 4039800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673428/1666666 [1:14:10<1:18:19, 211.36it/s]

finished frames 4040400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673538/1666666 [1:14:10<1:18:21, 211.23it/s]

finished frames 4041000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673626/1666666 [1:14:11<1:18:27, 210.93it/s]

finished frames 4041600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673736/1666666 [1:14:11<1:18:06, 211.87it/s]

finished frames 4042200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673824/1666666 [1:14:12<1:18:14, 211.50it/s]

finished frames 4042800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 673934/1666666 [1:14:12<1:18:08, 211.72it/s]

finished frames 4043400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674020/1666666 [1:14:13<1:25:28, 193.57it/s]

finished frames 4044000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674128/1666666 [1:14:13<1:19:51, 207.17it/s]

finished frames 4044600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674233/1666666 [1:14:14<1:19:37, 207.73it/s]

finished frames 4045200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674338/1666666 [1:14:14<1:19:29, 208.06it/s]

finished frames 4045800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674424/1666666 [1:14:14<1:19:12, 208.77it/s]

finished frames 4046400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674530/1666666 [1:14:15<1:19:13, 208.70it/s]

finished frames 4047000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674636/1666666 [1:14:15<1:19:09, 208.88it/s]

finished frames 4047600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674742/1666666 [1:14:16<1:19:18, 208.46it/s]

finished frames 4048200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674827/1666666 [1:14:16<1:19:22, 208.26it/s]

finished frames 4048800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 40%|████      | 674934/1666666 [1:14:17<1:19:05, 209.00it/s]

finished frames 4049400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675039/1666666 [1:14:17<1:21:18, 203.28it/s]

finished frames 4050000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675123/1666666 [1:14:18<1:19:52, 206.89it/s]

finished frames 4050600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675228/1666666 [1:14:18<1:19:22, 208.19it/s]

finished frames 4051200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675334/1666666 [1:14:19<1:19:08, 208.78it/s]

finished frames 4051800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675439/1666666 [1:14:19<1:19:17, 208.35it/s]

finished frames 4052400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675523/1666666 [1:14:20<1:19:22, 208.10it/s]

finished frames 4053000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675630/1666666 [1:14:20<1:18:43, 209.81it/s]

finished frames 4053600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675738/1666666 [1:14:21<1:18:41, 209.87it/s]

finished frames 4054200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675823/1666666 [1:14:21<1:18:49, 209.50it/s]

finished frames 4054800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 675929/1666666 [1:14:22<1:19:03, 208.88it/s]

finished frames 4055400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676035/1666666 [1:14:22<1:20:56, 203.99it/s]

finished frames 4056000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676140/1666666 [1:14:23<1:19:39, 207.23it/s]

finished frames 4056600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676224/1666666 [1:14:23<1:19:28, 207.72it/s]

finished frames 4057200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676329/1666666 [1:14:24<1:26:13, 191.42it/s]

finished frames 4057800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676434/1666666 [1:14:24<1:20:30, 205.01it/s]

finished frames 4058400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676541/1666666 [1:14:25<1:19:22, 207.91it/s]

finished frames 4059000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676627/1666666 [1:14:25<1:18:43, 209.60it/s]

finished frames 4059600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676735/1666666 [1:14:26<1:18:15, 210.84it/s]

finished frames 4060200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676823/1666666 [1:14:26<1:18:20, 210.56it/s]

finished frames 4060800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 676933/1666666 [1:14:27<1:18:15, 210.78it/s]

finished frames 4061400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677020/1666666 [1:14:27<1:20:54, 203.85it/s]

finished frames 4062000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677128/1666666 [1:14:27<1:18:42, 209.55it/s]

finished frames 4062600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677236/1666666 [1:14:28<1:18:18, 210.60it/s]

finished frames 4063200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677324/1666666 [1:14:28<1:18:12, 210.85it/s]

finished frames 4063800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677434/1666666 [1:14:29<1:18:25, 210.25it/s]

finished frames 4064400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677522/1666666 [1:14:29<1:18:21, 210.39it/s]

finished frames 4065000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677632/1666666 [1:14:30<1:18:15, 210.65it/s]

finished frames 4065600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677741/1666666 [1:14:30<1:18:24, 210.19it/s]

finished frames 4066200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677828/1666666 [1:14:31<1:18:25, 210.13it/s]

finished frames 4066800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 677936/1666666 [1:14:31<1:18:31, 209.85it/s]

finished frames 4067400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678021/1666666 [1:14:32<1:21:28, 202.23it/s]

finished frames 4068000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678126/1666666 [1:14:32<1:19:21, 207.61it/s]

finished frames 4068600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678233/1666666 [1:14:33<1:18:34, 209.68it/s]

finished frames 4069200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678340/1666666 [1:14:33<1:18:34, 209.65it/s]

finished frames 4069800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678426/1666666 [1:14:34<1:18:31, 209.75it/s]

finished frames 4070400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678533/1666666 [1:14:34<1:18:32, 209.70it/s]

finished frames 4071000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678641/1666666 [1:14:35<1:17:51, 211.51it/s]

finished frames 4071600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678729/1666666 [1:14:35<1:23:05, 198.18it/s]

finished frames 4072200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678838/1666666 [1:14:36<1:21:15, 202.63it/s]

finished frames 4072800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 678925/1666666 [1:14:36<1:18:00, 211.05it/s]

finished frames 4073400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679035/1666666 [1:14:37<1:18:56, 208.52it/s]

finished frames 4074000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679122/1666666 [1:14:37<1:18:09, 210.57it/s]

finished frames 4074600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679232/1666666 [1:14:38<1:17:48, 211.51it/s]

finished frames 4075200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679342/1666666 [1:14:38<1:17:57, 211.09it/s]

finished frames 4075800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679430/1666666 [1:14:39<1:17:57, 211.04it/s]

finished frames 4076400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679540/1666666 [1:14:39<1:17:38, 211.90it/s]

finished frames 4077000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679628/1666666 [1:14:39<1:17:40, 211.79it/s]

finished frames 4077600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679738/1666666 [1:14:40<1:17:38, 211.86it/s]

finished frames 4078200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679826/1666666 [1:14:40<1:17:53, 211.14it/s]

finished frames 4078800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 679936/1666666 [1:14:41<1:17:53, 211.15it/s]

finished frames 4079400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680024/1666666 [1:14:41<1:19:24, 207.09it/s]

finished frames 4080000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680133/1666666 [1:14:42<1:18:19, 209.91it/s]

finished frames 4080600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680241/1666666 [1:14:42<1:17:50, 211.20it/s]

finished frames 4081200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680329/1666666 [1:14:43<1:17:46, 211.37it/s]

finished frames 4081800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680439/1666666 [1:14:43<1:17:50, 211.17it/s]

finished frames 4082400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680527/1666666 [1:14:44<1:17:45, 211.38it/s]

finished frames 4083000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680637/1666666 [1:14:44<1:17:37, 211.73it/s]

finished frames 4083600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680725/1666666 [1:14:45<1:17:33, 211.87it/s]

finished frames 4084200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680835/1666666 [1:14:45<1:17:33, 211.83it/s]

finished frames 4084800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 680923/1666666 [1:14:46<1:17:33, 211.81it/s]

finished frames 4085400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681033/1666666 [1:14:46<1:19:12, 207.39it/s]

finished frames 4086000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681139/1666666 [1:14:47<1:20:14, 204.71it/s]

finished frames 4086600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681224/1666666 [1:14:47<1:19:42, 206.06it/s]

finished frames 4087200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681331/1666666 [1:14:48<1:18:30, 209.17it/s]

finished frames 4087800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681441/1666666 [1:14:48<1:17:13, 212.63it/s]

finished frames 4088400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681529/1666666 [1:14:49<1:17:25, 212.07it/s]

finished frames 4089000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681639/1666666 [1:14:49<1:17:08, 212.83it/s]

finished frames 4089600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681727/1666666 [1:14:49<1:17:09, 212.75it/s]

finished frames 4090200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681837/1666666 [1:14:50<1:16:52, 213.50it/s]

finished frames 4090800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 681925/1666666 [1:14:50<1:16:57, 213.28it/s]

finished frames 4091400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682035/1666666 [1:14:51<1:18:46, 208.31it/s]

finished frames 4092000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682144/1666666 [1:14:51<1:17:09, 212.64it/s]

finished frames 4092600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682232/1666666 [1:14:52<1:16:11, 215.36it/s]

finished frames 4093200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682342/1666666 [1:14:52<1:16:13, 215.24it/s]

finished frames 4093800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682430/1666666 [1:14:53<1:15:38, 216.84it/s]

finished frames 4094400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682540/1666666 [1:14:53<1:15:50, 216.25it/s]

finished frames 4095000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682628/1666666 [1:14:54<1:15:49, 216.31it/s]

finished frames 4095600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682738/1666666 [1:14:54<1:16:01, 215.69it/s]

finished frames 4096200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682826/1666666 [1:14:55<1:15:48, 216.29it/s]

finished frames 4096800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 682936/1666666 [1:14:55<1:16:09, 215.28it/s]

finished frames 4097400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683024/1666666 [1:14:56<1:17:32, 211.41it/s]

finished frames 4098000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683134/1666666 [1:14:56<1:16:08, 215.29it/s]

finished frames 4098600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683244/1666666 [1:14:57<1:15:33, 216.93it/s]

finished frames 4099200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683332/1666666 [1:14:57<1:17:04, 212.65it/s]

finished frames 4099800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683420/1666666 [1:14:57<1:16:30, 214.17it/s]

finished frames 4100400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683530/1666666 [1:14:58<1:17:55, 210.28it/s]

finished frames 4101000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683640/1666666 [1:14:58<1:16:45, 213.46it/s]

finished frames 4101600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683728/1666666 [1:14:59<1:16:28, 214.22it/s]

finished frames 4102200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683838/1666666 [1:14:59<1:16:24, 214.40it/s]

finished frames 4102800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 683926/1666666 [1:15:00<1:16:27, 214.22it/s]

finished frames 4103400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684036/1666666 [1:15:00<1:18:14, 209.32it/s]

finished frames 4104000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684123/1666666 [1:15:01<1:17:45, 210.58it/s]

finished frames 4104600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684232/1666666 [1:15:01<1:17:56, 210.07it/s]

finished frames 4105200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684339/1666666 [1:15:02<1:18:09, 209.47it/s]

finished frames 4105800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684423/1666666 [1:15:02<1:18:41, 208.05it/s]

finished frames 4106400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684529/1666666 [1:15:03<1:18:29, 208.55it/s]

finished frames 4107000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684634/1666666 [1:15:03<1:18:30, 208.47it/s]

finished frames 4107600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684741/1666666 [1:15:04<1:18:14, 209.19it/s]

finished frames 4108200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684827/1666666 [1:15:04<1:18:07, 209.48it/s]

finished frames 4108800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 684934/1666666 [1:15:05<1:18:09, 209.37it/s]

finished frames 4109400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685041/1666666 [1:15:05<1:19:43, 205.21it/s]

finished frames 4110000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685125/1666666 [1:15:06<1:18:40, 207.94it/s]

finished frames 4110600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685231/1666666 [1:15:06<1:18:31, 208.30it/s]

finished frames 4111200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685336/1666666 [1:15:07<1:18:46, 207.63it/s]

finished frames 4111800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685442/1666666 [1:15:07<1:18:15, 208.99it/s]

finished frames 4112400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685526/1666666 [1:15:07<1:18:42, 207.76it/s]

finished frames 4113000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685632/1666666 [1:15:08<1:18:29, 208.32it/s]

finished frames 4113600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685739/1666666 [1:15:09<1:19:00, 206.94it/s]

finished frames 4114200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685825/1666666 [1:15:09<1:18:16, 208.84it/s]

finished frames 4114800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 685930/1666666 [1:15:09<1:18:20, 208.63it/s]

finished frames 4115400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686036/1666666 [1:15:10<1:19:29, 205.58it/s]

finished frames 4116000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686141/1666666 [1:15:10<1:18:24, 208.41it/s]

finished frames 4116600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686225/1666666 [1:15:11<1:18:26, 208.33it/s]

finished frames 4117200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686332/1666666 [1:15:11<1:18:07, 209.13it/s]

finished frames 4117800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686437/1666666 [1:15:12<1:18:24, 208.34it/s]

finished frames 4118400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686543/1666666 [1:15:12<1:18:09, 208.99it/s]

finished frames 4119000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686628/1666666 [1:15:13<1:18:12, 208.85it/s]

finished frames 4119600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686735/1666666 [1:15:13<1:18:07, 209.04it/s]

finished frames 4120200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686841/1666666 [1:15:14<1:18:11, 208.86it/s]

finished frames 4120800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 686925/1666666 [1:15:14<1:18:12, 208.78it/s]

finished frames 4121400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 687030/1666666 [1:15:15<1:19:56, 204.25it/s]

finished frames 4122000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 687135/1666666 [1:15:15<1:18:38, 207.60it/s]

finished frames 4122600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 687241/1666666 [1:15:16<1:18:16, 208.55it/s]

finished frames 4123200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 687325/1666666 [1:15:16<1:18:23, 208.21it/s]

finished frames 4123800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████      | 687430/1666666 [1:15:17<1:18:31, 207.84it/s]

finished frames 4124400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 687535/1666666 [1:15:17<1:18:20, 208.29it/s]

finished frames 4125000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 687641/1666666 [1:15:18<1:18:06, 208.92it/s]

finished frames 4125600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 687728/1666666 [1:15:18<1:17:55, 209.37it/s]

finished frames 4126200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 687833/1666666 [1:15:19<1:18:08, 208.77it/s]

finished frames 4126800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 687941/1666666 [1:15:19<1:17:47, 209.68it/s]

finished frames 4127400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688025/1666666 [1:15:20<1:19:40, 204.71it/s]

finished frames 4128000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688130/1666666 [1:15:20<1:26:56, 187.57it/s]

finished frames 4128600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688237/1666666 [1:15:21<1:19:20, 205.53it/s]

finished frames 4129200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688342/1666666 [1:15:21<1:20:09, 203.43it/s]

finished frames 4129800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688426/1666666 [1:15:22<1:18:40, 207.21it/s]

finished frames 4130400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688531/1666666 [1:15:22<1:18:20, 208.11it/s]

finished frames 4131000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688637/1666666 [1:15:23<1:18:08, 208.60it/s]

finished frames 4131600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688742/1666666 [1:15:23<1:18:05, 208.73it/s]

finished frames 4132200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688826/1666666 [1:15:23<1:18:10, 208.46it/s]

finished frames 4132800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 688931/1666666 [1:15:24<1:18:06, 208.61it/s]

finished frames 4133400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689037/1666666 [1:15:24<1:19:14, 205.60it/s]

finished frames 4134000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689124/1666666 [1:15:25<1:16:57, 211.72it/s]

finished frames 4134600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689234/1666666 [1:15:25<1:16:23, 213.27it/s]

finished frames 4135200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689344/1666666 [1:15:26<1:16:08, 213.91it/s]

finished frames 4135800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689432/1666666 [1:15:26<1:16:18, 213.44it/s]

finished frames 4136400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689542/1666666 [1:15:27<1:16:03, 214.10it/s]

finished frames 4137000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689630/1666666 [1:15:27<1:16:06, 213.95it/s]

finished frames 4137600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689740/1666666 [1:15:28<1:16:08, 213.83it/s]

finished frames 4138200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689828/1666666 [1:15:28<1:16:10, 213.72it/s]

finished frames 4138800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 689938/1666666 [1:15:29<1:16:04, 214.00it/s]

finished frames 4139400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690026/1666666 [1:15:29<1:17:39, 209.61it/s]

finished frames 4140000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690136/1666666 [1:15:30<1:16:17, 213.35it/s]

finished frames 4140600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690224/1666666 [1:15:30<1:16:34, 212.52it/s]

finished frames 4141200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690334/1666666 [1:15:31<1:16:29, 212.74it/s]

finished frames 4141800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690444/1666666 [1:15:31<1:16:13, 213.45it/s]

finished frames 4142400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690532/1666666 [1:15:31<1:16:09, 213.60it/s]

finished frames 4143000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690642/1666666 [1:15:32<1:15:45, 214.73it/s]

finished frames 4143600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690730/1666666 [1:15:32<1:16:03, 213.88it/s]

finished frames 4144200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690840/1666666 [1:15:33<1:15:53, 214.31it/s]

finished frames 4144800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 690928/1666666 [1:15:33<1:16:09, 213.54it/s]

finished frames 4145400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 691038/1666666 [1:15:34<1:17:38, 209.44it/s]

finished frames 4146000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 691125/1666666 [1:15:34<1:16:25, 212.77it/s]

finished frames 4146600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 691235/1666666 [1:15:35<1:17:04, 210.90it/s]

finished frames 4147200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 691322/1666666 [1:15:35<1:17:31, 209.68it/s]

finished frames 4147800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 691432/1666666 [1:15:36<1:17:21, 210.12it/s]

finished frames 4148400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 691540/1666666 [1:15:36<1:17:33, 209.54it/s]

finished frames 4149000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 41%|████▏     | 691627/1666666 [1:15:37<1:16:21, 212.82it/s]

finished frames 4149600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 691737/1666666 [1:15:37<1:15:49, 214.29it/s]

finished frames 4150200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 691825/1666666 [1:15:38<1:15:44, 214.53it/s]

finished frames 4150800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 691935/1666666 [1:15:38<1:15:48, 214.32it/s]

finished frames 4151400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692023/1666666 [1:15:39<1:17:32, 209.50it/s]

finished frames 4152000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692133/1666666 [1:15:39<1:16:06, 213.40it/s]

finished frames 4152600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692243/1666666 [1:15:40<1:16:00, 213.65it/s]

finished frames 4153200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692331/1666666 [1:15:40<1:15:46, 214.32it/s]

finished frames 4153800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692441/1666666 [1:15:41<1:15:51, 214.03it/s]

finished frames 4154400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692529/1666666 [1:15:41<1:15:37, 214.66it/s]

finished frames 4155000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692639/1666666 [1:15:41<1:15:41, 214.48it/s]

finished frames 4155600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692727/1666666 [1:15:42<1:15:45, 214.28it/s]

finished frames 4156200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692837/1666666 [1:15:42<1:15:56, 213.71it/s]

finished frames 4156800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 692925/1666666 [1:15:43<1:20:30, 201.59it/s]

finished frames 4157400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693033/1666666 [1:15:43<1:23:18, 194.77it/s]

finished frames 4158000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693142/1666666 [1:15:44<1:17:03, 210.54it/s]

finished frames 4158600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693230/1666666 [1:15:44<1:16:06, 213.16it/s]

finished frames 4159200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693340/1666666 [1:15:45<1:15:50, 213.88it/s]

finished frames 4159800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693428/1666666 [1:15:45<1:15:50, 213.89it/s]

finished frames 4160400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693538/1666666 [1:15:46<1:15:48, 213.93it/s]

finished frames 4161000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693626/1666666 [1:15:46<1:15:44, 214.11it/s]

finished frames 4161600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693736/1666666 [1:15:47<1:16:17, 212.56it/s]

finished frames 4162200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693822/1666666 [1:15:47<1:17:28, 209.30it/s]

finished frames 4162800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 693927/1666666 [1:15:48<1:17:28, 209.25it/s]

finished frames 4163400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694034/1666666 [1:15:48<1:18:34, 206.30it/s]

finished frames 4164000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694143/1666666 [1:15:49<1:16:16, 212.48it/s]

finished frames 4164600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694231/1666666 [1:15:49<1:16:01, 213.20it/s]

finished frames 4165200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694341/1666666 [1:15:50<1:16:04, 213.04it/s]

finished frames 4165800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694429/1666666 [1:15:50<1:16:06, 212.91it/s]

finished frames 4166400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694539/1666666 [1:15:50<1:16:03, 213.01it/s]

finished frames 4167000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694627/1666666 [1:15:51<1:16:02, 213.04it/s]

finished frames 4167600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694736/1666666 [1:15:51<1:17:03, 210.23it/s]

finished frames 4168200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694824/1666666 [1:15:52<1:16:23, 212.05it/s]

finished frames 4168800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 694934/1666666 [1:15:52<1:16:07, 212.73it/s]

finished frames 4169400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695022/1666666 [1:15:53<1:18:19, 206.76it/s]

finished frames 4170000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695130/1666666 [1:15:53<1:16:41, 211.14it/s]

finished frames 4170600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695241/1666666 [1:15:54<1:13:59, 218.81it/s]

finished frames 4171200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695330/1666666 [1:15:54<1:14:23, 217.60it/s]

finished frames 4171800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695441/1666666 [1:15:55<1:18:24, 206.44it/s]

finished frames 4172400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695528/1666666 [1:15:55<1:16:56, 210.38it/s]

finished frames 4173000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695642/1666666 [1:15:56<1:13:50, 219.15it/s]

finished frames 4173600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695733/1666666 [1:15:56<1:14:52, 216.11it/s]

finished frames 4174200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695823/1666666 [1:15:56<1:14:35, 216.94it/s]

finished frames 4174800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 695932/1666666 [1:15:57<1:18:02, 207.30it/s]

finished frames 4175400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696039/1666666 [1:15:58<1:18:03, 207.25it/s]

finished frames 4176000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696127/1666666 [1:15:58<1:16:05, 212.60it/s]

finished frames 4176600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696237/1666666 [1:15:58<1:15:34, 214.01it/s]

finished frames 4177200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696325/1666666 [1:15:59<1:18:43, 205.41it/s]

finished frames 4177800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696434/1666666 [1:15:59<1:16:12, 212.17it/s]

finished frames 4178400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696544/1666666 [1:16:00<1:15:45, 213.44it/s]

finished frames 4179000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696632/1666666 [1:16:00<1:15:41, 213.57it/s]

finished frames 4179600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696742/1666666 [1:16:01<1:15:24, 214.35it/s]

finished frames 4180200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696830/1666666 [1:16:01<1:15:19, 214.61it/s]

finished frames 4180800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 696940/1666666 [1:16:02<1:15:23, 214.39it/s]

finished frames 4181400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697028/1666666 [1:16:02<1:17:09, 209.46it/s]

finished frames 4182000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697138/1666666 [1:16:03<1:15:52, 212.97it/s]

finished frames 4182600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697226/1666666 [1:16:03<1:15:48, 213.12it/s]

finished frames 4183200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697336/1666666 [1:16:04<1:16:11, 212.05it/s]

finished frames 4183800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697424/1666666 [1:16:04<1:15:33, 213.80it/s]

finished frames 4184400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697534/1666666 [1:16:05<1:15:32, 213.82it/s]

finished frames 4185000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697644/1666666 [1:16:05<1:15:29, 213.95it/s]

finished frames 4185600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697732/1666666 [1:16:06<1:18:27, 205.83it/s]

finished frames 4186200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697842/1666666 [1:16:06<1:19:22, 203.43it/s]

finished frames 4186800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 697929/1666666 [1:16:06<1:16:17, 211.61it/s]

finished frames 4187400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698039/1666666 [1:16:07<1:17:09, 209.21it/s]

finished frames 4188000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698127/1666666 [1:16:07<1:15:52, 212.74it/s]

finished frames 4188600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698237/1666666 [1:16:08<1:15:33, 213.60it/s]

finished frames 4189200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698325/1666666 [1:16:08<1:15:28, 213.84it/s]

finished frames 4189800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698435/1666666 [1:16:09<1:15:29, 213.78it/s]

finished frames 4190400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698523/1666666 [1:16:09<1:15:36, 213.42it/s]

finished frames 4191000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698633/1666666 [1:16:10<1:15:38, 213.29it/s]

finished frames 4191600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698743/1666666 [1:16:10<1:15:24, 213.92it/s]

finished frames 4192200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698831/1666666 [1:16:11<1:15:58, 212.32it/s]

finished frames 4192800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 698941/1666666 [1:16:11<1:16:22, 211.19it/s]

finished frames 4193400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699028/1666666 [1:16:12<1:18:09, 206.35it/s]

finished frames 4194000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699135/1666666 [1:16:12<1:17:03, 209.28it/s]

finished frames 4194600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699242/1666666 [1:16:13<1:16:57, 209.49it/s]

finished frames 4195200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699327/1666666 [1:16:13<1:17:17, 208.61it/s]

finished frames 4195800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699435/1666666 [1:16:14<1:16:37, 210.36it/s]

finished frames 4196400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699522/1666666 [1:16:14<1:16:42, 210.15it/s]

finished frames 4197000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699632/1666666 [1:16:15<1:16:23, 210.98it/s]

finished frames 4197600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699742/1666666 [1:16:15<1:16:21, 211.06it/s]

finished frames 4198200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699829/1666666 [1:16:16<1:16:37, 210.30it/s]

finished frames 4198800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 699938/1666666 [1:16:16<1:16:26, 210.80it/s]

finished frames 4199400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700024/1666666 [1:16:16<1:18:07, 206.20it/s]

finished frames 4200000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700132/1666666 [1:16:17<1:16:36, 210.28it/s]

finished frames 4200600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700241/1666666 [1:16:17<1:16:27, 210.64it/s]

finished frames 4201200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700328/1666666 [1:16:18<1:16:39, 210.08it/s]

finished frames 4201800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700437/1666666 [1:16:18<1:16:22, 210.86it/s]

finished frames 4202400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700524/1666666 [1:16:19<1:16:11, 211.33it/s]

finished frames 4203000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700634/1666666 [1:16:19<1:16:14, 211.17it/s]

finished frames 4203600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700743/1666666 [1:16:20<1:16:25, 210.66it/s]

finished frames 4204200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700831/1666666 [1:16:20<1:16:06, 211.51it/s]

finished frames 4204800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 700941/1666666 [1:16:21<1:16:01, 211.72it/s]

finished frames 4205400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701027/1666666 [1:16:21<1:17:50, 206.77it/s]

finished frames 4206000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701137/1666666 [1:16:22<1:15:19, 213.63it/s]

finished frames 4206600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701225/1666666 [1:16:22<1:15:08, 214.14it/s]

finished frames 4207200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701335/1666666 [1:16:23<1:15:11, 213.98it/s]

finished frames 4207800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701423/1666666 [1:16:23<1:15:16, 213.70it/s]

finished frames 4208400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701533/1666666 [1:16:24<1:14:52, 214.82it/s]

finished frames 4209000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701643/1666666 [1:16:24<1:14:45, 215.14it/s]

finished frames 4209600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701731/1666666 [1:16:25<1:15:15, 213.68it/s]

finished frames 4210200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701841/1666666 [1:16:25<1:15:22, 213.33it/s]

finished frames 4210800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 701929/1666666 [1:16:25<1:15:15, 213.63it/s]

finished frames 4211400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702039/1666666 [1:16:26<1:16:53, 209.11it/s]

finished frames 4212000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702126/1666666 [1:16:26<1:15:43, 212.30it/s]

finished frames 4212600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702236/1666666 [1:16:27<1:15:15, 213.56it/s]

finished frames 4213200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702324/1666666 [1:16:27<1:15:25, 213.10it/s]

finished frames 4213800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702434/1666666 [1:16:28<1:19:47, 201.41it/s]

finished frames 4214400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702521/1666666 [1:16:28<1:16:17, 210.61it/s]

finished frames 4215000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702630/1666666 [1:16:29<1:17:09, 208.23it/s]

finished frames 4215600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702740/1666666 [1:16:29<1:15:33, 212.61it/s]

finished frames 4216200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702828/1666666 [1:16:30<1:15:20, 213.23it/s]

finished frames 4216800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 702938/1666666 [1:16:30<1:15:15, 213.44it/s]

finished frames 4217400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703026/1666666 [1:16:31<1:16:35, 209.69it/s]

finished frames 4218000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703136/1666666 [1:16:31<1:16:02, 211.20it/s]

finished frames 4218600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703224/1666666 [1:16:32<1:15:35, 212.44it/s]

finished frames 4219200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703334/1666666 [1:16:32<1:15:24, 212.93it/s]

finished frames 4219800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703422/1666666 [1:16:33<1:15:26, 212.82it/s]

finished frames 4220400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703532/1666666 [1:16:33<1:15:22, 212.99it/s]

finished frames 4221000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703642/1666666 [1:16:34<1:15:11, 213.46it/s]

finished frames 4221600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703730/1666666 [1:16:34<1:15:25, 212.77it/s]

finished frames 4222200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703840/1666666 [1:16:35<1:15:11, 213.40it/s]

finished frames 4222800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 703928/1666666 [1:16:35<1:15:12, 213.34it/s]

finished frames 4223400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704038/1666666 [1:16:35<1:16:49, 208.85it/s]

finished frames 4224000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704126/1666666 [1:16:36<1:15:23, 212.77it/s]

finished frames 4224600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704236/1666666 [1:16:36<1:14:49, 214.37it/s]

finished frames 4225200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704324/1666666 [1:16:37<1:14:46, 214.48it/s]

finished frames 4225800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704434/1666666 [1:16:37<1:14:39, 214.82it/s]

finished frames 4226400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704544/1666666 [1:16:38<1:14:30, 215.19it/s]

finished frames 4227000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704632/1666666 [1:16:38<1:15:39, 211.92it/s]

finished frames 4227600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704741/1666666 [1:16:39<1:16:04, 210.74it/s]

finished frames 4228200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704827/1666666 [1:16:39<1:16:41, 209.05it/s]

finished frames 4228800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 704934/1666666 [1:16:40<1:16:38, 209.15it/s]

finished frames 4229400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705039/1666666 [1:16:40<1:18:21, 204.53it/s]

finished frames 4230000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705123/1666666 [1:16:41<1:17:14, 207.48it/s]

finished frames 4230600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705228/1666666 [1:16:41<1:16:41, 208.93it/s]

finished frames 4231200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705333/1666666 [1:16:42<1:16:41, 208.92it/s]

finished frames 4231800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705439/1666666 [1:16:42<1:16:39, 209.01it/s]

finished frames 4232400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705523/1666666 [1:16:43<1:16:40, 208.92it/s]

finished frames 4233000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705629/1666666 [1:16:43<1:16:39, 208.96it/s]

finished frames 4233600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705734/1666666 [1:16:44<1:16:52, 208.35it/s]

finished frames 4234200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705839/1666666 [1:16:44<1:16:38, 208.95it/s]

finished frames 4234800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 705925/1666666 [1:16:44<1:16:28, 209.40it/s]

finished frames 4235400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706030/1666666 [1:16:45<1:18:32, 203.86it/s]

finished frames 4236000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706137/1666666 [1:16:45<1:16:47, 208.48it/s]

finished frames 4236600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706242/1666666 [1:16:46<1:16:44, 208.61it/s]

finished frames 4237200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706326/1666666 [1:16:46<1:16:45, 208.53it/s]

finished frames 4237800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706432/1666666 [1:16:47<1:16:38, 208.80it/s]

finished frames 4238400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706539/1666666 [1:16:47<1:16:26, 209.35it/s]

finished frames 4239000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706623/1666666 [1:16:48<1:16:37, 208.80it/s]

finished frames 4239600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706728/1666666 [1:16:48<1:16:43, 208.52it/s]

finished frames 4240200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706834/1666666 [1:16:49<1:16:31, 209.03it/s]

finished frames 4240800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 706940/1666666 [1:16:49<1:16:27, 209.20it/s]

finished frames 4241400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707024/1666666 [1:16:50<1:18:20, 204.17it/s]

finished frames 4242000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707130/1666666 [1:16:50<1:16:39, 208.60it/s]

finished frames 4242600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707238/1666666 [1:16:51<1:16:20, 209.47it/s]

finished frames 4243200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707322/1666666 [1:16:51<1:19:58, 199.91it/s]

finished frames 4243800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707429/1666666 [1:16:52<1:17:02, 207.50it/s]

finished frames 4244400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707535/1666666 [1:16:52<1:16:30, 208.95it/s]

finished frames 4245000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707642/1666666 [1:16:53<1:16:21, 209.32it/s]

finished frames 4245600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707727/1666666 [1:16:53<1:16:26, 209.07it/s]

finished frames 4246200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707833/1666666 [1:16:54<1:16:18, 209.41it/s]

finished frames 4246800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 707940/1666666 [1:16:54<1:16:19, 209.37it/s]

finished frames 4247400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 708024/1666666 [1:16:55<1:18:11, 204.35it/s]

finished frames 4248000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 708131/1666666 [1:16:55<1:16:48, 208.01it/s]

finished frames 4248600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 42%|████▏     | 708237/1666666 [1:16:56<1:16:26, 208.95it/s]

finished frames 4249200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 708343/1666666 [1:16:56<1:16:16, 209.42it/s]

finished frames 4249800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 708429/1666666 [1:16:57<1:16:07, 209.79it/s]

finished frames 4250400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 708535/1666666 [1:16:57<1:16:19, 209.24it/s]

finished frames 4251000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 708641/1666666 [1:16:58<1:16:14, 209.44it/s]

finished frames 4251600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 708727/1666666 [1:16:58<1:16:16, 209.32it/s]

finished frames 4252200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 708834/1666666 [1:16:58<1:16:17, 209.25it/s]

finished frames 4252800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 708941/1666666 [1:16:59<1:16:15, 209.33it/s]

finished frames 4253400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709025/1666666 [1:16:59<1:17:59, 204.65it/s]

finished frames 4254000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709131/1666666 [1:17:00<1:16:47, 207.82it/s]

finished frames 4254600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709236/1666666 [1:17:00<1:16:47, 207.79it/s]

finished frames 4255200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709341/1666666 [1:17:01<1:16:50, 207.63it/s]

finished frames 4255800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709425/1666666 [1:17:01<1:16:55, 207.40it/s]

finished frames 4256400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709530/1666666 [1:17:02<1:20:16, 198.71it/s]

finished frames 4257000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709635/1666666 [1:17:02<1:20:44, 197.54it/s]

finished frames 4257600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709742/1666666 [1:17:03<1:16:51, 207.49it/s]

finished frames 4258200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709826/1666666 [1:17:03<1:16:35, 208.19it/s]

finished frames 4258800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 709934/1666666 [1:17:04<1:15:42, 210.63it/s]

finished frames 4259400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710020/1666666 [1:17:04<1:20:19, 198.48it/s]

finished frames 4260000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710127/1666666 [1:17:05<1:16:40, 207.92it/s]

finished frames 4260600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710234/1666666 [1:17:05<1:16:01, 209.70it/s]

finished frames 4261200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710340/1666666 [1:17:06<1:16:05, 209.46it/s]

finished frames 4261800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710425/1666666 [1:17:06<1:15:56, 209.84it/s]

finished frames 4262400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710534/1666666 [1:17:07<1:16:01, 209.62it/s]

finished frames 4263000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710641/1666666 [1:17:07<1:15:54, 209.90it/s]

finished frames 4263600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710727/1666666 [1:17:08<1:15:57, 209.74it/s]

finished frames 4264200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710835/1666666 [1:17:08<1:15:38, 210.60it/s]

finished frames 4264800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 710923/1666666 [1:17:09<1:15:08, 212.01it/s]

finished frames 4265400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711033/1666666 [1:17:09<1:16:35, 207.95it/s]

finished frames 4266000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711142/1666666 [1:17:10<1:15:14, 211.68it/s]

finished frames 4266600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711230/1666666 [1:17:10<1:14:46, 212.95it/s]

finished frames 4267200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711340/1666666 [1:17:11<1:14:26, 213.91it/s]

finished frames 4267800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711428/1666666 [1:17:11<1:14:41, 213.15it/s]

finished frames 4268400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711538/1666666 [1:17:11<1:14:41, 213.15it/s]

finished frames 4269000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711626/1666666 [1:17:12<1:14:37, 213.29it/s]

finished frames 4269600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711736/1666666 [1:17:12<1:14:39, 213.18it/s]

finished frames 4270200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711824/1666666 [1:17:13<1:14:54, 212.46it/s]

finished frames 4270800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 711933/1666666 [1:17:13<1:16:01, 209.29it/s]

finished frames 4271400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712020/1666666 [1:17:14<1:20:30, 197.62it/s]

finished frames 4272000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712129/1666666 [1:17:14<1:15:33, 210.56it/s]

finished frames 4272600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712239/1666666 [1:17:15<1:14:28, 213.59it/s]

finished frames 4273200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712327/1666666 [1:17:15<1:14:09, 214.50it/s]

finished frames 4273800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712437/1666666 [1:17:16<1:14:22, 213.84it/s]

finished frames 4274400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712525/1666666 [1:17:16<1:14:23, 213.75it/s]

finished frames 4275000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712635/1666666 [1:17:17<1:14:14, 214.16it/s]

finished frames 4275600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712723/1666666 [1:17:17<1:14:11, 214.29it/s]

finished frames 4276200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712833/1666666 [1:17:18<1:14:18, 213.93it/s]

finished frames 4276800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 712943/1666666 [1:17:18<1:14:28, 213.44it/s]

finished frames 4277400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713031/1666666 [1:17:19<1:16:17, 208.35it/s]

finished frames 4278000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713141/1666666 [1:17:19<1:14:41, 212.78it/s]

finished frames 4278600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713229/1666666 [1:17:19<1:14:31, 213.25it/s]

finished frames 4279200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713339/1666666 [1:17:20<1:14:25, 213.49it/s]

finished frames 4279800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713427/1666666 [1:17:20<1:14:22, 213.63it/s]

finished frames 4280400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713537/1666666 [1:17:21<1:14:24, 213.51it/s]

finished frames 4281000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713625/1666666 [1:17:21<1:14:22, 213.55it/s]

finished frames 4281600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713735/1666666 [1:17:22<1:14:22, 213.54it/s]

finished frames 4282200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713823/1666666 [1:17:22<1:14:32, 213.07it/s]

finished frames 4282800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 713933/1666666 [1:17:23<1:14:17, 213.74it/s]

finished frames 4283400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714021/1666666 [1:17:23<1:16:24, 207.81it/s]

finished frames 4284000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714130/1666666 [1:17:24<1:14:47, 212.28it/s]

finished frames 4284600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714218/1666666 [1:17:24<1:24:17, 188.33it/s]

finished frames 4285200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714327/1666666 [1:17:25<1:15:44, 209.57it/s]

finished frames 4285800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714437/1666666 [1:17:25<1:15:48, 209.37it/s]

finished frames 4286400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714525/1666666 [1:17:26<1:14:56, 211.77it/s]

finished frames 4287000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714635/1666666 [1:17:26<1:15:00, 211.55it/s]

finished frames 4287600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714723/1666666 [1:17:27<1:15:13, 210.91it/s]

finished frames 4288200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714833/1666666 [1:17:27<1:14:48, 212.05it/s]

finished frames 4288800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 714943/1666666 [1:17:28<1:14:17, 213.49it/s]

finished frames 4289400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715031/1666666 [1:17:28<1:15:53, 209.00it/s]

finished frames 4290000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715141/1666666 [1:17:29<1:14:24, 213.13it/s]

finished frames 4290600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715229/1666666 [1:17:29<1:14:17, 213.46it/s]

finished frames 4291200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715339/1666666 [1:17:30<1:14:14, 213.57it/s]

finished frames 4291800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715427/1666666 [1:17:30<1:14:12, 213.64it/s]

finished frames 4292400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715537/1666666 [1:17:30<1:14:10, 213.70it/s]

finished frames 4293000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715625/1666666 [1:17:31<1:14:09, 213.76it/s]

finished frames 4293600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715735/1666666 [1:17:31<1:14:03, 214.01it/s]

finished frames 4294200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715823/1666666 [1:17:32<1:14:13, 213.50it/s]

finished frames 4294800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 715933/1666666 [1:17:32<1:14:09, 213.68it/s]

finished frames 4295400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716021/1666666 [1:17:33<1:16:22, 207.44it/s]

finished frames 4296000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716130/1666666 [1:17:33<1:14:28, 212.71it/s]

finished frames 4296600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716240/1666666 [1:17:34<1:14:22, 212.96it/s]

finished frames 4297200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716328/1666666 [1:17:34<1:14:09, 213.57it/s]

finished frames 4297800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716438/1666666 [1:17:35<1:14:13, 213.38it/s]

finished frames 4298400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716526/1666666 [1:17:35<1:14:19, 213.07it/s]

finished frames 4299000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716636/1666666 [1:17:36<1:18:32, 201.58it/s]

finished frames 4299600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716722/1666666 [1:17:36<1:28:59, 177.91it/s]

finished frames 4300200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716832/1666666 [1:17:37<1:16:20, 207.35it/s]

finished frames 4300800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 716942/1666666 [1:17:37<1:14:28, 212.51it/s]

finished frames 4301400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717030/1666666 [1:17:38<1:15:48, 208.77it/s]

finished frames 4302000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717140/1666666 [1:17:38<1:14:24, 212.67it/s]

finished frames 4302600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717228/1666666 [1:17:38<1:14:06, 213.54it/s]

finished frames 4303200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717338/1666666 [1:17:39<1:14:03, 213.65it/s]

finished frames 4303800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717426/1666666 [1:17:39<1:13:51, 214.18it/s]

finished frames 4304400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717536/1666666 [1:17:40<1:13:52, 214.11it/s]

finished frames 4305000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717624/1666666 [1:17:40<1:14:05, 213.51it/s]

finished frames 4305600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717734/1666666 [1:17:41<1:13:54, 213.97it/s]

finished frames 4306200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717844/1666666 [1:17:41<1:14:08, 213.28it/s]

finished frames 4306800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 717932/1666666 [1:17:42<1:14:05, 213.42it/s]

finished frames 4307400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718020/1666666 [1:17:42<1:16:24, 206.94it/s]

finished frames 4308000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718129/1666666 [1:17:43<1:14:40, 211.71it/s]

finished frames 4308600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718239/1666666 [1:17:43<1:14:27, 212.28it/s]

finished frames 4309200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718327/1666666 [1:17:44<1:14:07, 213.21it/s]

finished frames 4309800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718437/1666666 [1:17:44<1:14:06, 213.24it/s]

finished frames 4310400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718525/1666666 [1:17:45<1:14:09, 213.09it/s]

finished frames 4311000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718635/1666666 [1:17:45<1:14:22, 212.43it/s]

finished frames 4311600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718723/1666666 [1:17:46<1:13:53, 213.83it/s]

finished frames 4312200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718833/1666666 [1:17:46<1:13:25, 215.15it/s]

finished frames 4312800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 718943/1666666 [1:17:47<1:13:44, 214.22it/s]

finished frames 4313400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719029/1666666 [1:17:47<1:18:25, 201.37it/s]

finished frames 4314000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719117/1666666 [1:17:47<1:21:08, 194.62it/s]

finished frames 4314600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719226/1666666 [1:17:48<1:14:57, 210.65it/s]

finished frames 4315200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719336/1666666 [1:17:48<1:14:15, 212.62it/s]

finished frames 4315800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719424/1666666 [1:17:49<1:13:44, 214.11it/s]

finished frames 4316400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719534/1666666 [1:17:49<1:13:33, 214.60it/s]

finished frames 4317000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719644/1666666 [1:17:50<1:13:18, 215.32it/s]

finished frames 4317600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719732/1666666 [1:17:50<1:13:32, 214.61it/s]

finished frames 4318200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719842/1666666 [1:17:51<1:13:18, 215.28it/s]

finished frames 4318800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 719930/1666666 [1:17:51<1:13:15, 215.38it/s]

finished frames 4319400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720040/1666666 [1:17:52<1:15:09, 209.94it/s]

finished frames 4320000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720128/1666666 [1:17:52<1:13:47, 213.81it/s]

finished frames 4320600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720238/1666666 [1:17:53<1:13:14, 215.35it/s]

finished frames 4321200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720326/1666666 [1:17:53<1:13:38, 214.15it/s]

finished frames 4321800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720437/1666666 [1:17:54<1:12:23, 217.87it/s]

finished frames 4322400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720525/1666666 [1:17:54<1:13:05, 215.74it/s]

finished frames 4323000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720636/1666666 [1:17:55<1:13:27, 214.66it/s]

finished frames 4323600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720724/1666666 [1:17:55<1:13:55, 213.26it/s]

finished frames 4324200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720836/1666666 [1:17:55<1:11:59, 218.97it/s]

finished frames 4324800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 720928/1666666 [1:17:56<1:11:04, 221.79it/s]

finished frames 4325400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721039/1666666 [1:17:56<1:13:34, 214.22it/s]

finished frames 4326000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721127/1666666 [1:17:57<1:13:43, 213.77it/s]

finished frames 4326600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721236/1666666 [1:17:57<1:15:23, 209.02it/s]

finished frames 4327200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721324/1666666 [1:17:58<1:13:59, 212.94it/s]

finished frames 4327800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721434/1666666 [1:17:58<1:16:50, 205.00it/s]

finished frames 4328400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721543/1666666 [1:17:59<1:14:22, 211.78it/s]

finished frames 4329000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721630/1666666 [1:17:59<1:15:40, 208.15it/s]

finished frames 4329600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721739/1666666 [1:18:00<1:14:10, 212.34it/s]

finished frames 4330200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721827/1666666 [1:18:00<1:14:01, 212.73it/s]

finished frames 4330800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 721937/1666666 [1:18:01<1:13:55, 212.99it/s]

finished frames 4331400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722024/1666666 [1:18:01<1:16:51, 204.86it/s]

finished frames 4332000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722129/1666666 [1:18:02<1:15:52, 207.50it/s]

finished frames 4332600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722235/1666666 [1:18:02<1:15:25, 208.69it/s]

finished frames 4333200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722343/1666666 [1:18:03<1:14:43, 210.63it/s]

finished frames 4333800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722431/1666666 [1:18:03<1:13:43, 213.46it/s]

finished frames 4334400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722541/1666666 [1:18:04<1:13:31, 214.03it/s]

finished frames 4335000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722629/1666666 [1:18:04<1:13:39, 213.61it/s]

finished frames 4335600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722739/1666666 [1:18:04<1:13:29, 214.05it/s]

finished frames 4336200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722827/1666666 [1:18:05<1:13:35, 213.76it/s]

finished frames 4336800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 722937/1666666 [1:18:05<1:13:43, 213.34it/s]

finished frames 4337400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723025/1666666 [1:18:06<1:15:14, 209.04it/s]

finished frames 4338000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723135/1666666 [1:18:06<1:13:57, 212.65it/s]

finished frames 4338600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723223/1666666 [1:18:07<1:13:50, 212.95it/s]

finished frames 4339200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723333/1666666 [1:18:07<1:13:44, 213.23it/s]

finished frames 4339800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723443/1666666 [1:18:08<1:13:47, 213.02it/s]

finished frames 4340400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723531/1666666 [1:18:08<1:13:36, 213.53it/s]

finished frames 4341000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723641/1666666 [1:18:09<1:13:43, 213.20it/s]

finished frames 4341600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723729/1666666 [1:18:09<1:13:41, 213.24it/s]

finished frames 4342200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723839/1666666 [1:18:10<1:18:35, 199.96it/s]

finished frames 4342800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 723925/1666666 [1:18:10<1:18:13, 200.84it/s]

finished frames 4343400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724033/1666666 [1:18:11<1:16:22, 205.69it/s]

finished frames 4344000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724142/1666666 [1:18:11<1:14:05, 212.00it/s]

finished frames 4344600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724230/1666666 [1:18:12<1:13:41, 213.17it/s]

finished frames 4345200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724340/1666666 [1:18:12<1:13:41, 213.14it/s]

finished frames 4345800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724428/1666666 [1:18:13<1:13:42, 213.05it/s]

finished frames 4346400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724538/1666666 [1:18:13<1:13:51, 212.59it/s]

finished frames 4347000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724626/1666666 [1:18:13<1:13:52, 212.53it/s]

finished frames 4347600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724736/1666666 [1:18:14<1:13:49, 212.64it/s]

finished frames 4348200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724824/1666666 [1:18:14<1:13:44, 212.89it/s]

finished frames 4348800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 43%|████▎     | 724934/1666666 [1:18:15<1:13:43, 212.89it/s]

finished frames 4349400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725022/1666666 [1:18:15<1:16:02, 206.37it/s]

finished frames 4350000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725131/1666666 [1:18:16<1:14:13, 211.42it/s]

finished frames 4350600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725241/1666666 [1:18:16<1:13:42, 212.85it/s]

finished frames 4351200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725329/1666666 [1:18:17<1:13:38, 213.05it/s]

finished frames 4351800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725439/1666666 [1:18:17<1:13:27, 213.55it/s]

finished frames 4352400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725527/1666666 [1:18:18<1:13:44, 212.70it/s]

finished frames 4353000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725637/1666666 [1:18:18<1:13:35, 213.12it/s]

finished frames 4353600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725725/1666666 [1:18:19<1:13:36, 213.05it/s]

finished frames 4354200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725835/1666666 [1:18:19<1:13:37, 213.00it/s]

finished frames 4354800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 725923/1666666 [1:18:20<1:13:28, 213.37it/s]

finished frames 4355400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726033/1666666 [1:18:20<1:15:13, 208.42it/s]

finished frames 4356000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726121/1666666 [1:18:21<1:14:04, 211.60it/s]

finished frames 4356600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726230/1666666 [1:18:21<1:15:10, 208.49it/s]

finished frames 4357200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726340/1666666 [1:18:22<1:16:02, 206.08it/s]

finished frames 4357800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726428/1666666 [1:18:22<1:14:01, 211.68it/s]

finished frames 4358400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726538/1666666 [1:18:23<1:13:28, 213.28it/s]

finished frames 4359000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726626/1666666 [1:18:23<1:13:28, 213.21it/s]

finished frames 4359600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726736/1666666 [1:18:23<1:13:21, 213.54it/s]

finished frames 4360200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726824/1666666 [1:18:24<1:13:20, 213.59it/s]

finished frames 4360800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 726934/1666666 [1:18:24<1:13:06, 214.22it/s]

finished frames 4361400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727022/1666666 [1:18:25<1:15:41, 206.92it/s]

finished frames 4362000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727130/1666666 [1:18:25<1:14:40, 209.67it/s]

finished frames 4362600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727237/1666666 [1:18:26<1:14:22, 210.54it/s]

finished frames 4363200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727324/1666666 [1:18:26<1:14:17, 210.73it/s]

finished frames 4363800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727434/1666666 [1:18:27<1:13:24, 213.26it/s]

finished frames 4364400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727544/1666666 [1:18:27<1:12:44, 215.16it/s]

finished frames 4365000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727632/1666666 [1:18:28<1:12:39, 215.42it/s]

finished frames 4365600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727742/1666666 [1:18:28<1:12:16, 216.49it/s]

finished frames 4366200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727830/1666666 [1:18:29<1:12:23, 216.16it/s]

finished frames 4366800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 727940/1666666 [1:18:29<1:12:17, 216.41it/s]

finished frames 4367400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728028/1666666 [1:18:30<1:14:03, 211.21it/s]

finished frames 4368000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728138/1666666 [1:18:30<1:12:37, 215.39it/s]

finished frames 4368600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728226/1666666 [1:18:30<1:12:26, 215.91it/s]

finished frames 4369200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728336/1666666 [1:18:31<1:12:27, 215.83it/s]

finished frames 4369800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728424/1666666 [1:18:31<1:12:38, 215.27it/s]

finished frames 4370400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728512/1666666 [1:18:32<1:12:36, 215.33it/s]

finished frames 4371000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728622/1666666 [1:18:32<1:15:33, 206.92it/s]

finished frames 4371600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728731/1666666 [1:18:33<1:13:31, 212.62it/s]

finished frames 4372200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728841/1666666 [1:18:33<1:12:24, 215.88it/s]

finished frames 4372800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 728929/1666666 [1:18:34<1:12:24, 215.82it/s]

finished frames 4373400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 729039/1666666 [1:18:34<1:13:59, 211.19it/s]

finished frames 4374000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▎     | 729127/1666666 [1:18:35<1:12:40, 215.00it/s]

finished frames 4374600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 729237/1666666 [1:18:35<1:12:20, 215.95it/s]

finished frames 4375200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 729325/1666666 [1:18:36<1:12:50, 214.49it/s]

finished frames 4375800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 729435/1666666 [1:18:36<1:12:54, 214.26it/s]

finished frames 4376400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 729523/1666666 [1:18:37<1:12:53, 214.29it/s]

finished frames 4377000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 729633/1666666 [1:18:37<1:13:03, 213.78it/s]

finished frames 4377600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 729743/1666666 [1:18:38<1:12:54, 214.16it/s]

finished frames 4378200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 729831/1666666 [1:18:38<1:12:42, 214.77it/s]

finished frames 4378800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 729941/1666666 [1:18:39<1:12:51, 214.26it/s]

finished frames 4379400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730029/1666666 [1:18:39<1:14:26, 209.68it/s]

finished frames 4380000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730139/1666666 [1:18:39<1:13:22, 212.72it/s]

finished frames 4380600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730227/1666666 [1:18:40<1:13:07, 213.46it/s]

finished frames 4381200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730337/1666666 [1:18:40<1:12:43, 214.58it/s]

finished frames 4381800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730425/1666666 [1:18:41<1:12:52, 214.12it/s]

finished frames 4382400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730535/1666666 [1:18:41<1:12:55, 213.96it/s]

finished frames 4383000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730623/1666666 [1:18:42<1:13:00, 213.71it/s]

finished frames 4383600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730733/1666666 [1:18:42<1:12:46, 214.36it/s]

finished frames 4384200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730843/1666666 [1:18:43<1:12:45, 214.36it/s]

finished frames 4384800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 730931/1666666 [1:18:43<1:19:48, 195.40it/s]

finished frames 4385400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731040/1666666 [1:18:44<1:15:48, 205.69it/s]

finished frames 4386000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731127/1666666 [1:18:44<1:13:51, 211.12it/s]

finished frames 4386600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731237/1666666 [1:18:45<1:13:36, 211.82it/s]

finished frames 4387200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731325/1666666 [1:18:45<1:13:41, 211.53it/s]

finished frames 4387800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731435/1666666 [1:18:46<1:13:43, 211.41it/s]

finished frames 4388400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731542/1666666 [1:18:46<1:14:33, 209.02it/s]

finished frames 4389000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731626/1666666 [1:18:47<1:14:48, 208.34it/s]

finished frames 4389600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731734/1666666 [1:18:47<1:14:06, 210.24it/s]

finished frames 4390200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731843/1666666 [1:18:48<1:13:49, 211.03it/s]

finished frames 4390800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 731928/1666666 [1:18:48<1:14:19, 209.59it/s]

finished frames 4391400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732034/1666666 [1:18:48<1:15:53, 205.25it/s]

finished frames 4392000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732141/1666666 [1:18:49<1:14:30, 209.02it/s]

finished frames 4392600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732227/1666666 [1:18:49<1:14:15, 209.75it/s]

finished frames 4393200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732334/1666666 [1:18:50<1:14:14, 209.77it/s]

finished frames 4393800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732439/1666666 [1:18:50<1:14:22, 209.35it/s]

finished frames 4394400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732523/1666666 [1:18:51<1:14:35, 208.72it/s]

finished frames 4395000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732629/1666666 [1:18:51<1:14:33, 208.79it/s]

finished frames 4395600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732736/1666666 [1:18:52<1:14:09, 209.88it/s]

finished frames 4396200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732822/1666666 [1:18:52<1:14:18, 209.47it/s]

finished frames 4396800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 732931/1666666 [1:18:53<1:13:19, 212.21it/s]

finished frames 4397400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733041/1666666 [1:18:53<1:14:36, 208.55it/s]

finished frames 4398000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733128/1666666 [1:18:54<1:13:31, 211.59it/s]

finished frames 4398600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733238/1666666 [1:18:54<1:13:14, 212.39it/s]

finished frames 4399200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733326/1666666 [1:18:55<1:16:07, 204.34it/s]

finished frames 4399800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733433/1666666 [1:18:55<1:21:15, 191.41it/s]

finished frames 4400400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733543/1666666 [1:18:56<1:14:10, 209.66it/s]

finished frames 4401000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733631/1666666 [1:18:56<1:13:09, 212.58it/s]

finished frames 4401600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733741/1666666 [1:18:57<1:12:53, 213.29it/s]

finished frames 4402200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733829/1666666 [1:18:57<1:12:45, 213.68it/s]

finished frames 4402800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 733939/1666666 [1:18:58<1:12:30, 214.42it/s]

finished frames 4403400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734027/1666666 [1:18:58<1:14:15, 209.33it/s]

finished frames 4404000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734137/1666666 [1:18:59<1:12:52, 213.25it/s]

finished frames 4404600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734225/1666666 [1:18:59<1:12:48, 213.44it/s]

finished frames 4405200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734335/1666666 [1:18:59<1:12:39, 213.86it/s]

finished frames 4405800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734423/1666666 [1:19:00<1:12:53, 213.16it/s]

finished frames 4406400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734533/1666666 [1:19:00<1:12:36, 213.95it/s]

finished frames 4407000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734643/1666666 [1:19:01<1:12:25, 214.50it/s]

finished frames 4407600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734731/1666666 [1:19:01<1:12:38, 213.81it/s]

finished frames 4408200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734841/1666666 [1:19:02<1:12:33, 214.03it/s]

finished frames 4408800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 734929/1666666 [1:19:02<1:12:43, 213.54it/s]

finished frames 4409400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735039/1666666 [1:19:03<1:14:29, 208.43it/s]

finished frames 4410000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735126/1666666 [1:19:03<1:13:11, 212.14it/s]

finished frames 4410600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735236/1666666 [1:19:04<1:12:56, 212.85it/s]

finished frames 4411200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735324/1666666 [1:19:04<1:13:35, 210.91it/s]

finished frames 4411800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735431/1666666 [1:19:05<1:14:24, 208.58it/s]

finished frames 4412400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735536/1666666 [1:19:05<1:14:45, 207.59it/s]

finished frames 4413000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735642/1666666 [1:19:06<1:14:23, 208.57it/s]

finished frames 4413600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735726/1666666 [1:19:06<1:16:56, 201.64it/s]

finished frames 4414200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735831/1666666 [1:19:07<1:19:36, 194.86it/s]

finished frames 4414800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 735937/1666666 [1:19:07<1:15:10, 206.34it/s]

finished frames 4415400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736023/1666666 [1:19:08<1:15:16, 206.07it/s]

finished frames 4416000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736132/1666666 [1:19:08<1:13:07, 212.10it/s]

finished frames 4416600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736242/1666666 [1:19:09<1:12:43, 213.25it/s]

finished frames 4417200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736330/1666666 [1:19:09<1:12:30, 213.84it/s]

finished frames 4417800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736440/1666666 [1:19:10<1:12:28, 213.91it/s]

finished frames 4418400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736528/1666666 [1:19:10<1:12:33, 213.64it/s]

finished frames 4419000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736638/1666666 [1:19:10<1:12:39, 213.34it/s]

finished frames 4419600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736726/1666666 [1:19:11<1:12:10, 214.76it/s]

finished frames 4420200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736836/1666666 [1:19:11<1:12:23, 214.06it/s]

finished frames 4420800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 736924/1666666 [1:19:12<1:12:32, 213.62it/s]

finished frames 4421400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737034/1666666 [1:19:12<1:14:22, 208.31it/s]

finished frames 4422000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737143/1666666 [1:19:13<1:12:47, 212.85it/s]

finished frames 4422600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737231/1666666 [1:19:13<1:12:38, 213.26it/s]

finished frames 4423200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737341/1666666 [1:19:14<1:12:21, 214.06it/s]

finished frames 4423800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737429/1666666 [1:19:14<1:12:15, 214.35it/s]

finished frames 4424400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737539/1666666 [1:19:15<1:12:09, 214.62it/s]

finished frames 4425000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737627/1666666 [1:19:15<1:12:10, 214.51it/s]

finished frames 4425600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737737/1666666 [1:19:16<1:12:00, 214.99it/s]

finished frames 4426200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737825/1666666 [1:19:16<1:11:57, 215.15it/s]

finished frames 4426800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 737935/1666666 [1:19:17<1:11:56, 215.13it/s]

finished frames 4427400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738023/1666666 [1:19:17<1:13:23, 210.90it/s]

finished frames 4428000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738133/1666666 [1:19:18<1:12:16, 214.11it/s]

finished frames 4428600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738243/1666666 [1:19:18<1:15:02, 206.19it/s]

finished frames 4429200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738329/1666666 [1:19:18<1:14:02, 208.99it/s]

finished frames 4429800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738436/1666666 [1:19:19<1:13:47, 209.67it/s]

finished frames 4430400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738522/1666666 [1:19:19<1:13:41, 209.90it/s]

finished frames 4431000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738632/1666666 [1:19:20<1:12:34, 213.12it/s]

finished frames 4431600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738742/1666666 [1:19:20<1:13:19, 210.92it/s]

finished frames 4432200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738830/1666666 [1:19:21<1:13:01, 211.74it/s]

finished frames 4432800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 738940/1666666 [1:19:21<1:12:28, 213.35it/s]

finished frames 4433400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739028/1666666 [1:19:22<1:13:36, 210.05it/s]

finished frames 4434000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739138/1666666 [1:19:22<1:12:22, 213.58it/s]

finished frames 4434600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739226/1666666 [1:19:23<1:12:18, 213.79it/s]

finished frames 4435200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739336/1666666 [1:19:23<1:11:59, 214.71it/s]

finished frames 4435800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739424/1666666 [1:19:24<1:12:13, 213.98it/s]

finished frames 4436400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739534/1666666 [1:19:24<1:12:19, 213.67it/s]

finished frames 4437000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739644/1666666 [1:19:25<1:12:22, 213.48it/s]

finished frames 4437600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739732/1666666 [1:19:25<1:12:02, 214.43it/s]

finished frames 4438200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739842/1666666 [1:19:26<1:11:58, 214.60it/s]

finished frames 4438800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 739930/1666666 [1:19:26<1:12:00, 214.52it/s]

finished frames 4439400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740040/1666666 [1:19:27<1:13:29, 210.16it/s]

finished frames 4440000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740128/1666666 [1:19:27<1:12:23, 213.30it/s]

finished frames 4440600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740238/1666666 [1:19:27<1:12:23, 213.31it/s]

finished frames 4441200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740326/1666666 [1:19:28<1:12:16, 213.64it/s]

finished frames 4441800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740436/1666666 [1:19:28<1:12:08, 213.97it/s]

finished frames 4442400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740524/1666666 [1:19:29<1:12:26, 213.08it/s]

finished frames 4443000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740633/1666666 [1:19:29<1:13:54, 208.83it/s]

finished frames 4443600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740743/1666666 [1:19:30<1:12:26, 213.05it/s]

finished frames 4444200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740831/1666666 [1:19:30<1:12:09, 213.84it/s]

finished frames 4444800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 740941/1666666 [1:19:31<1:12:00, 214.24it/s]

finished frames 4445400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 741029/1666666 [1:19:31<1:13:29, 209.91it/s]

finished frames 4446000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 741139/1666666 [1:19:32<1:12:25, 212.97it/s]

finished frames 4446600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 741227/1666666 [1:19:32<1:12:07, 213.87it/s]

finished frames 4447200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 741337/1666666 [1:19:33<1:11:50, 214.65it/s]

finished frames 4447800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 741425/1666666 [1:19:33<1:11:55, 214.40it/s]

finished frames 4448400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 741535/1666666 [1:19:34<1:11:43, 214.97it/s]

finished frames 4449000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 44%|████▍     | 741623/1666666 [1:19:34<1:11:52, 214.49it/s]

finished frames 4449600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 741733/1666666 [1:19:35<1:11:59, 214.15it/s]

finished frames 4450200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 741843/1666666 [1:19:35<1:11:50, 214.53it/s]

finished frames 4450800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 741931/1666666 [1:19:35<1:11:52, 214.45it/s]

finished frames 4451400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742041/1666666 [1:19:36<1:13:25, 209.86it/s]

finished frames 4452000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742129/1666666 [1:19:36<1:12:28, 212.60it/s]

finished frames 4452600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742239/1666666 [1:19:37<1:12:10, 213.46it/s]

finished frames 4453200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742327/1666666 [1:19:37<1:12:05, 213.69it/s]

finished frames 4453800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742437/1666666 [1:19:38<1:13:55, 208.35it/s]

finished frames 4454400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742524/1666666 [1:19:38<1:13:20, 210.01it/s]

finished frames 4455000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742634/1666666 [1:19:39<1:12:10, 213.36it/s]

finished frames 4455600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742744/1666666 [1:19:39<1:12:02, 213.73it/s]

finished frames 4456200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742832/1666666 [1:19:40<1:12:20, 212.86it/s]

finished frames 4456800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 742920/1666666 [1:19:40<1:12:15, 213.05it/s]

finished frames 4457400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743029/1666666 [1:19:41<1:16:56, 200.07it/s]

finished frames 4458000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743138/1666666 [1:19:41<1:13:01, 210.80it/s]

finished frames 4458600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743226/1666666 [1:19:42<1:12:33, 212.14it/s]

finished frames 4459200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743336/1666666 [1:19:42<1:12:19, 212.77it/s]

finished frames 4459800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743424/1666666 [1:19:43<1:12:06, 213.37it/s]

finished frames 4460400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743534/1666666 [1:19:43<1:12:17, 212.84it/s]

finished frames 4461000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743644/1666666 [1:19:44<1:12:06, 213.34it/s]

finished frames 4461600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743732/1666666 [1:19:44<1:12:09, 213.18it/s]

finished frames 4462200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743842/1666666 [1:19:45<1:12:05, 213.35it/s]

finished frames 4462800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 743930/1666666 [1:19:45<1:12:06, 213.29it/s]

finished frames 4463400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744040/1666666 [1:19:45<1:13:36, 208.91it/s]

finished frames 4464000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744128/1666666 [1:19:46<1:12:58, 210.71it/s]

finished frames 4464600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744237/1666666 [1:19:46<1:13:13, 209.95it/s]

finished frames 4465200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744323/1666666 [1:19:47<1:14:12, 207.13it/s]

finished frames 4465800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744432/1666666 [1:19:47<1:12:14, 212.78it/s]

finished frames 4466400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744542/1666666 [1:19:48<1:11:55, 213.68it/s]

finished frames 4467000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744630/1666666 [1:19:48<1:12:29, 211.99it/s]

finished frames 4467600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744740/1666666 [1:19:49<1:12:02, 213.28it/s]

finished frames 4468200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744828/1666666 [1:19:49<1:12:02, 213.27it/s]

finished frames 4468800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 744938/1666666 [1:19:50<1:11:49, 213.89it/s]

finished frames 4469400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745026/1666666 [1:19:50<1:13:29, 209.01it/s]

finished frames 4470000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745135/1666666 [1:19:51<1:11:59, 213.32it/s]

finished frames 4470600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745223/1666666 [1:19:51<1:11:29, 214.79it/s]

finished frames 4471200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745333/1666666 [1:19:52<1:11:28, 214.85it/s]

finished frames 4471800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745443/1666666 [1:19:52<1:11:16, 215.43it/s]

finished frames 4472400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745531/1666666 [1:19:52<1:11:22, 215.08it/s]

finished frames 4473000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745641/1666666 [1:19:53<1:11:17, 215.34it/s]

finished frames 4473600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745729/1666666 [1:19:53<1:11:22, 215.07it/s]

finished frames 4474200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745841/1666666 [1:19:54<1:11:45, 213.85it/s]

finished frames 4474800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 745929/1666666 [1:19:54<1:12:15, 212.36it/s]

finished frames 4475400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746035/1666666 [1:19:55<1:16:10, 201.45it/s]

finished frames 4476000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746141/1666666 [1:19:55<1:13:59, 207.33it/s]

finished frames 4476600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746229/1666666 [1:19:56<1:12:08, 212.63it/s]

finished frames 4477200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746339/1666666 [1:19:56<1:12:43, 210.93it/s]

finished frames 4477800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746427/1666666 [1:19:57<1:12:59, 210.12it/s]

finished frames 4478400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746533/1666666 [1:19:57<1:15:13, 203.87it/s]

finished frames 4479000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746638/1666666 [1:19:58<1:14:23, 206.11it/s]

finished frames 4479600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746722/1666666 [1:19:58<1:14:25, 206.03it/s]

finished frames 4480200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746829/1666666 [1:19:59<1:13:36, 208.25it/s]

finished frames 4480800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 746934/1666666 [1:19:59<1:16:25, 200.56it/s]

finished frames 4481400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747039/1666666 [1:20:00<1:15:35, 202.77it/s]

finished frames 4482000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747123/1666666 [1:20:00<1:14:30, 205.71it/s]

finished frames 4482600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747228/1666666 [1:20:01<1:13:57, 207.20it/s]

finished frames 4483200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747333/1666666 [1:20:01<1:13:38, 208.08it/s]

finished frames 4483800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747438/1666666 [1:20:02<1:13:26, 208.61it/s]

finished frames 4484400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747522/1666666 [1:20:02<1:14:04, 206.79it/s]

finished frames 4485000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747627/1666666 [1:20:03<1:16:41, 199.72it/s]

finished frames 4485600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747733/1666666 [1:20:03<1:16:26, 200.36it/s]

finished frames 4486200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747839/1666666 [1:20:04<1:13:46, 207.57it/s]

finished frames 4486800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 747924/1666666 [1:20:04<1:13:30, 208.30it/s]

finished frames 4487400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748029/1666666 [1:20:05<1:17:30, 197.52it/s]

finished frames 4488000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748137/1666666 [1:20:05<1:13:45, 207.54it/s]

finished frames 4488600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748223/1666666 [1:20:06<1:13:13, 209.06it/s]

finished frames 4489200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748332/1666666 [1:20:06<1:12:45, 210.37it/s]

finished frames 4489800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748441/1666666 [1:20:07<1:12:49, 210.15it/s]

finished frames 4490400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748528/1666666 [1:20:07<1:12:56, 209.77it/s]

finished frames 4491000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748636/1666666 [1:20:08<1:12:42, 210.41it/s]

finished frames 4491600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748724/1666666 [1:20:08<1:12:38, 210.63it/s]

finished frames 4492200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748834/1666666 [1:20:08<1:12:32, 210.86it/s]

finished frames 4492800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 748922/1666666 [1:20:09<1:12:26, 211.14it/s]

finished frames 4493400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749032/1666666 [1:20:09<1:13:56, 206.82it/s]

finished frames 4494000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749140/1666666 [1:20:10<1:12:47, 210.08it/s]

finished frames 4494600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749225/1666666 [1:20:10<1:13:12, 208.86it/s]

finished frames 4495200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749331/1666666 [1:20:11<1:13:13, 208.81it/s]

finished frames 4495800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749437/1666666 [1:20:11<1:13:26, 208.17it/s]

finished frames 4496400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749524/1666666 [1:20:12<1:12:59, 209.43it/s]

finished frames 4497000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749630/1666666 [1:20:12<1:12:57, 209.47it/s]

finished frames 4497600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749736/1666666 [1:20:13<1:13:04, 209.11it/s]

finished frames 4498200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749842/1666666 [1:20:13<1:13:06, 208.99it/s]

finished frames 4498800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▍     | 749929/1666666 [1:20:14<1:14:35, 204.84it/s]

finished frames 4499400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750037/1666666 [1:20:14<1:13:47, 207.05it/s]

finished frames 4500000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750125/1666666 [1:20:15<1:12:31, 210.62it/s]

finished frames 4500600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750235/1666666 [1:20:15<1:11:49, 212.63it/s]

finished frames 4501200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750323/1666666 [1:20:16<1:11:33, 213.41it/s]

finished frames 4501800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750433/1666666 [1:20:16<1:11:21, 213.99it/s]

finished frames 4502400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750543/1666666 [1:20:17<1:11:23, 213.89it/s]

finished frames 4503000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750631/1666666 [1:20:17<1:11:35, 213.26it/s]

finished frames 4503600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750741/1666666 [1:20:18<1:11:21, 213.91it/s]

finished frames 4504200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750829/1666666 [1:20:18<1:11:30, 213.45it/s]

finished frames 4504800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 750939/1666666 [1:20:18<1:11:13, 214.30it/s]

finished frames 4505400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751026/1666666 [1:20:19<1:13:09, 208.58it/s]

finished frames 4506000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751134/1666666 [1:20:19<1:12:52, 209.40it/s]

finished frames 4506600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751239/1666666 [1:20:20<1:12:59, 209.02it/s]

finished frames 4507200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751325/1666666 [1:20:20<1:12:51, 209.37it/s]

finished frames 4507800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751432/1666666 [1:20:21<1:12:44, 209.70it/s]

finished frames 4508400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751539/1666666 [1:20:21<1:12:42, 209.77it/s]

finished frames 4509000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751624/1666666 [1:20:22<1:12:51, 209.31it/s]

finished frames 4509600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751731/1666666 [1:20:22<1:12:47, 209.47it/s]

finished frames 4510200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751837/1666666 [1:20:23<1:12:52, 209.23it/s]

finished frames 4510800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 751943/1666666 [1:20:23<1:12:42, 209.69it/s]

finished frames 4511400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752027/1666666 [1:20:24<1:14:31, 204.55it/s]

finished frames 4512000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752132/1666666 [1:20:24<1:13:31, 207.32it/s]

finished frames 4512600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752216/1666666 [1:20:25<1:13:16, 208.01it/s]

finished frames 4513200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752324/1666666 [1:20:25<1:15:40, 201.36it/s]

finished frames 4513800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752432/1666666 [1:20:26<1:12:15, 210.88it/s]

finished frames 4514400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752542/1666666 [1:20:26<1:11:28, 213.18it/s]

finished frames 4515000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752630/1666666 [1:20:27<1:11:22, 213.45it/s]

finished frames 4515600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752740/1666666 [1:20:27<1:11:16, 213.73it/s]

finished frames 4516200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752828/1666666 [1:20:28<1:11:02, 214.37it/s]

finished frames 4516800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 752938/1666666 [1:20:28<1:10:39, 215.55it/s]

finished frames 4517400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753026/1666666 [1:20:28<1:12:19, 210.52it/s]

finished frames 4518000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753136/1666666 [1:20:29<1:11:12, 213.83it/s]

finished frames 4518600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753224/1666666 [1:20:29<1:11:32, 212.79it/s]

finished frames 4519200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753334/1666666 [1:20:30<1:11:13, 213.70it/s]

finished frames 4519800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753422/1666666 [1:20:30<1:11:25, 213.10it/s]

finished frames 4520400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753532/1666666 [1:20:31<1:11:28, 212.92it/s]

finished frames 4521000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753642/1666666 [1:20:31<1:11:27, 212.94it/s]

finished frames 4521600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753730/1666666 [1:20:32<1:11:19, 213.35it/s]

finished frames 4522200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753840/1666666 [1:20:32<1:11:50, 211.77it/s]

finished frames 4522800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 753928/1666666 [1:20:33<1:11:59, 211.28it/s]

finished frames 4523400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754037/1666666 [1:20:33<1:14:15, 204.85it/s]

finished frames 4524000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754142/1666666 [1:20:34<1:13:41, 206.37it/s]

finished frames 4524600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754226/1666666 [1:20:34<1:13:33, 206.75it/s]

finished frames 4525200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754331/1666666 [1:20:35<1:13:13, 207.64it/s]

finished frames 4525800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754436/1666666 [1:20:35<1:13:18, 207.38it/s]

finished frames 4526400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754541/1666666 [1:20:36<1:13:21, 207.24it/s]

finished frames 4527000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754625/1666666 [1:20:36<1:22:30, 184.23it/s]

finished frames 4527600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754730/1666666 [1:20:37<1:14:48, 203.15it/s]

finished frames 4528200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754835/1666666 [1:20:37<1:13:46, 206.02it/s]

finished frames 4528800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 754941/1666666 [1:20:38<1:13:09, 207.71it/s]

finished frames 4529400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755025/1666666 [1:20:38<1:15:13, 201.98it/s]

finished frames 4530000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755130/1666666 [1:20:39<1:13:43, 206.05it/s]

finished frames 4530600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755235/1666666 [1:20:39<1:13:34, 206.48it/s]

finished frames 4531200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755341/1666666 [1:20:40<1:12:51, 208.48it/s]

finished frames 4531800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755426/1666666 [1:20:40<1:12:41, 208.91it/s]

finished frames 4532400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755535/1666666 [1:20:41<1:11:17, 213.01it/s]

finished frames 4533000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755623/1666666 [1:20:41<1:11:05, 213.59it/s]

finished frames 4533600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755733/1666666 [1:20:42<1:10:59, 213.88it/s]

finished frames 4534200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755843/1666666 [1:20:42<1:11:59, 210.84it/s]

finished frames 4534800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 755930/1666666 [1:20:42<1:12:14, 210.13it/s]

finished frames 4535400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756036/1666666 [1:20:43<1:14:02, 205.00it/s]

finished frames 4536000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756142/1666666 [1:20:43<1:12:44, 208.63it/s]

finished frames 4536600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756229/1666666 [1:20:44<1:12:15, 209.97it/s]

finished frames 4537200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756337/1666666 [1:20:44<1:12:08, 210.29it/s]

finished frames 4537800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756425/1666666 [1:20:45<1:12:10, 210.18it/s]

finished frames 4538400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756535/1666666 [1:20:45<1:12:15, 209.94it/s]

finished frames 4539000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756641/1666666 [1:20:46<1:12:20, 209.68it/s]

finished frames 4539600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756725/1666666 [1:20:46<1:12:48, 208.31it/s]

finished frames 4540200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756830/1666666 [1:20:47<1:13:04, 207.53it/s]

finished frames 4540800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 756935/1666666 [1:20:47<1:12:47, 208.31it/s]

finished frames 4541400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757040/1666666 [1:20:48<1:14:37, 203.16it/s]

finished frames 4542000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757124/1666666 [1:20:48<1:15:59, 199.48it/s]

finished frames 4542600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757229/1666666 [1:20:49<1:13:34, 206.00it/s]

finished frames 4543200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757334/1666666 [1:20:49<1:12:59, 207.62it/s]

finished frames 4543800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757439/1666666 [1:20:50<1:12:58, 207.66it/s]

finished frames 4544400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757523/1666666 [1:20:50<1:12:57, 207.70it/s]

finished frames 4545000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757628/1666666 [1:20:51<1:13:05, 207.27it/s]

finished frames 4545600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757733/1666666 [1:20:51<1:13:05, 207.26it/s]

finished frames 4546200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757838/1666666 [1:20:52<1:12:57, 207.59it/s]

finished frames 4546800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 757922/1666666 [1:20:52<1:13:04, 207.28it/s]

finished frames 4547400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 758027/1666666 [1:20:53<1:14:47, 202.48it/s]

finished frames 4548000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 758134/1666666 [1:20:53<1:12:50, 207.90it/s]

finished frames 4548600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 758239/1666666 [1:20:54<1:12:40, 208.33it/s]

finished frames 4549200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 45%|████▌     | 758323/1666666 [1:20:54<1:12:44, 208.11it/s]

finished frames 4549800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 758428/1666666 [1:20:55<1:12:40, 208.29it/s]

finished frames 4550400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 758534/1666666 [1:20:55<1:12:42, 208.16it/s]

finished frames 4551000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 758639/1666666 [1:20:56<1:12:36, 208.43it/s]

finished frames 4551600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 758723/1666666 [1:20:56<1:12:37, 208.36it/s]

finished frames 4552200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 758829/1666666 [1:20:56<1:12:23, 209.00it/s]

finished frames 4552800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 758934/1666666 [1:20:57<1:12:34, 208.47it/s]

finished frames 4553400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759039/1666666 [1:20:57<1:14:08, 204.04it/s]

finished frames 4554000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759123/1666666 [1:20:58<1:13:06, 206.91it/s]

finished frames 4554600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759228/1666666 [1:20:58<1:12:48, 207.75it/s]

finished frames 4555200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759333/1666666 [1:20:59<1:14:48, 202.17it/s]

finished frames 4555800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759438/1666666 [1:20:59<1:15:05, 201.35it/s]

finished frames 4556400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759543/1666666 [1:21:00<1:12:58, 207.19it/s]

finished frames 4557000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759628/1666666 [1:21:00<1:12:25, 208.71it/s]

finished frames 4557600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759734/1666666 [1:21:01<1:12:23, 208.80it/s]

finished frames 4558200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759839/1666666 [1:21:01<1:12:31, 208.39it/s]

finished frames 4558800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 759923/1666666 [1:21:02<1:12:42, 207.85it/s]

finished frames 4559400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760028/1666666 [1:21:02<1:14:18, 203.36it/s]

finished frames 4560000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760134/1666666 [1:21:03<1:12:42, 207.82it/s]

finished frames 4560600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760239/1666666 [1:21:03<1:12:36, 208.05it/s]

finished frames 4561200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760323/1666666 [1:21:04<1:12:40, 207.84it/s]

finished frames 4561800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760428/1666666 [1:21:04<1:12:39, 207.87it/s]

finished frames 4562400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760535/1666666 [1:21:05<1:12:10, 209.23it/s]

finished frames 4563000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760643/1666666 [1:21:05<1:11:58, 209.78it/s]

finished frames 4563600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760728/1666666 [1:21:06<1:12:03, 209.56it/s]

finished frames 4564200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760834/1666666 [1:21:06<1:12:15, 208.94it/s]

finished frames 4564800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 760940/1666666 [1:21:07<1:12:05, 209.40it/s]

finished frames 4565400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761024/1666666 [1:21:07<1:14:06, 203.66it/s]

finished frames 4566000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761129/1666666 [1:21:08<1:13:14, 206.07it/s]

finished frames 4566600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761236/1666666 [1:21:08<1:12:12, 208.97it/s]

finished frames 4567200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761343/1666666 [1:21:09<1:11:54, 209.85it/s]

finished frames 4567800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761429/1666666 [1:21:09<1:11:58, 209.63it/s]

finished frames 4568400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761534/1666666 [1:21:10<1:12:19, 208.57it/s]

finished frames 4569000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761640/1666666 [1:21:10<1:15:16, 200.39it/s]

finished frames 4569600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761725/1666666 [1:21:11<1:12:51, 207.03it/s]

finished frames 4570200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761834/1666666 [1:21:11<1:11:12, 211.78it/s]

finished frames 4570800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 761944/1666666 [1:21:12<1:10:48, 212.96it/s]

finished frames 4571400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762032/1666666 [1:21:12<1:12:27, 208.07it/s]

finished frames 4572000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762141/1666666 [1:21:12<1:11:08, 211.90it/s]

finished frames 4572600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762229/1666666 [1:21:13<1:10:47, 212.96it/s]

finished frames 4573200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762339/1666666 [1:21:13<1:10:34, 213.57it/s]

finished frames 4573800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762427/1666666 [1:21:14<1:10:44, 213.04it/s]

finished frames 4574400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762537/1666666 [1:21:14<1:10:52, 212.60it/s]

finished frames 4575000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762625/1666666 [1:21:15<1:10:45, 212.94it/s]

finished frames 4575600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762735/1666666 [1:21:15<1:10:40, 213.16it/s]

finished frames 4576200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762823/1666666 [1:21:16<1:10:46, 212.85it/s]

finished frames 4576800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 762933/1666666 [1:21:16<1:10:42, 213.01it/s]

finished frames 4577400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763021/1666666 [1:21:17<1:12:33, 207.55it/s]

finished frames 4578000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763131/1666666 [1:21:17<1:10:53, 212.45it/s]

finished frames 4578600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763241/1666666 [1:21:18<1:10:08, 214.68it/s]

finished frames 4579200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763329/1666666 [1:21:18<1:10:02, 214.95it/s]

finished frames 4579800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763439/1666666 [1:21:19<1:09:54, 215.34it/s]

finished frames 4580400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763527/1666666 [1:21:19<1:10:00, 215.02it/s]

finished frames 4581000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763637/1666666 [1:21:20<1:09:51, 215.45it/s]

finished frames 4581600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763725/1666666 [1:21:20<1:10:05, 214.71it/s]

finished frames 4582200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763835/1666666 [1:21:20<1:10:03, 214.78it/s]

finished frames 4582800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 763923/1666666 [1:21:21<1:10:17, 214.06it/s]

finished frames 4583400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764031/1666666 [1:21:21<1:14:55, 200.80it/s]

finished frames 4584000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764138/1666666 [1:21:22<1:12:20, 207.95it/s]

finished frames 4584600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764222/1666666 [1:21:22<1:12:27, 207.59it/s]

finished frames 4585200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764328/1666666 [1:21:23<1:12:23, 207.76it/s]

finished frames 4585800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764433/1666666 [1:21:23<1:12:24, 207.66it/s]

finished frames 4586400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764538/1666666 [1:21:24<1:12:31, 207.33it/s]

finished frames 4587000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764622/1666666 [1:21:24<1:12:25, 207.57it/s]

finished frames 4587600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764727/1666666 [1:21:25<1:12:29, 207.35it/s]

finished frames 4588200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764834/1666666 [1:21:25<1:11:57, 208.89it/s]

finished frames 4588800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 764941/1666666 [1:21:26<1:11:54, 209.02it/s]

finished frames 4589400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765025/1666666 [1:21:26<1:13:42, 203.88it/s]

finished frames 4590000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765130/1666666 [1:21:27<1:12:32, 207.14it/s]

finished frames 4590600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765236/1666666 [1:21:27<1:12:05, 208.42it/s]

finished frames 4591200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765341/1666666 [1:21:28<1:12:11, 208.11it/s]

finished frames 4591800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765425/1666666 [1:21:28<1:12:10, 208.09it/s]

finished frames 4592400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765531/1666666 [1:21:29<1:11:56, 208.75it/s]

finished frames 4593000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765638/1666666 [1:21:29<1:11:47, 209.20it/s]

finished frames 4593600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765722/1666666 [1:21:30<1:12:12, 207.96it/s]

finished frames 4594200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765827/1666666 [1:21:30<1:12:15, 207.80it/s]

finished frames 4594800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 765933/1666666 [1:21:31<1:12:03, 208.35it/s]

finished frames 4595400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766038/1666666 [1:21:31<1:13:35, 203.97it/s]

finished frames 4596000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766122/1666666 [1:21:31<1:12:58, 205.66it/s]

finished frames 4596600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766227/1666666 [1:21:32<1:12:23, 207.29it/s]

finished frames 4597200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766332/1666666 [1:21:33<1:21:02, 185.16it/s]

finished frames 4597800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766437/1666666 [1:21:33<1:13:44, 203.45it/s]

finished frames 4598400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766542/1666666 [1:21:34<1:12:26, 207.09it/s]

finished frames 4599000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766626/1666666 [1:21:34<1:12:10, 207.84it/s]

finished frames 4599600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766731/1666666 [1:21:34<1:12:27, 207.00it/s]

finished frames 4600200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766836/1666666 [1:21:35<1:12:19, 207.34it/s]

finished frames 4600800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 766941/1666666 [1:21:35<1:12:03, 208.12it/s]

finished frames 4601400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767025/1666666 [1:21:36<1:13:36, 203.70it/s]

finished frames 4602000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767130/1666666 [1:21:36<1:12:25, 207.01it/s]

finished frames 4602600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767235/1666666 [1:21:37<1:12:11, 207.65it/s]

finished frames 4603200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767342/1666666 [1:21:37<1:11:46, 208.83it/s]

finished frames 4603800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767429/1666666 [1:21:38<1:11:20, 210.06it/s]

finished frames 4604400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767538/1666666 [1:21:38<1:11:20, 210.04it/s]

finished frames 4605000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767625/1666666 [1:21:39<1:11:17, 210.17it/s]

finished frames 4605600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767735/1666666 [1:21:39<1:11:03, 210.86it/s]

finished frames 4606200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767822/1666666 [1:21:40<1:11:06, 210.66it/s]

finished frames 4606800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 767932/1666666 [1:21:40<1:10:01, 213.90it/s]

finished frames 4607400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768020/1666666 [1:21:41<1:12:23, 206.87it/s]

finished frames 4608000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768130/1666666 [1:21:41<1:10:30, 212.39it/s]

finished frames 4608600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768240/1666666 [1:21:42<1:10:01, 213.81it/s]

finished frames 4609200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768328/1666666 [1:21:42<1:10:14, 213.16it/s]

finished frames 4609800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768438/1666666 [1:21:43<1:10:09, 213.40it/s]

finished frames 4610400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768526/1666666 [1:21:43<1:10:15, 213.08it/s]

finished frames 4611000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768636/1666666 [1:21:44<1:10:34, 212.06it/s]

finished frames 4611600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768724/1666666 [1:21:44<1:13:29, 203.65it/s]

finished frames 4612200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768834/1666666 [1:21:45<1:10:34, 212.02it/s]

finished frames 4612800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 768944/1666666 [1:21:45<1:09:44, 214.54it/s]

finished frames 4613400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769032/1666666 [1:21:45<1:11:18, 209.82it/s]

finished frames 4614000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769142/1666666 [1:21:46<1:09:29, 215.24it/s]

finished frames 4614600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769230/1666666 [1:21:46<1:09:16, 215.93it/s]

finished frames 4615200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769340/1666666 [1:21:47<1:09:08, 216.30it/s]

finished frames 4615800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769428/1666666 [1:21:47<1:09:10, 216.19it/s]

finished frames 4616400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769538/1666666 [1:21:48<1:08:59, 216.72it/s]

finished frames 4617000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769626/1666666 [1:21:48<1:09:04, 216.45it/s]

finished frames 4617600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769736/1666666 [1:21:49<1:08:55, 216.87it/s]

finished frames 4618200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769824/1666666 [1:21:49<1:09:21, 215.53it/s]

finished frames 4618800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 769934/1666666 [1:21:50<1:09:37, 214.63it/s]

finished frames 4619400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 770022/1666666 [1:21:50<1:11:59, 207.59it/s]

finished frames 4620000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 770132/1666666 [1:21:51<1:10:17, 212.57it/s]

finished frames 4620600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 770242/1666666 [1:21:51<1:09:47, 214.07it/s]

finished frames 4621200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 770330/1666666 [1:21:52<1:09:50, 213.90it/s]

finished frames 4621800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 770440/1666666 [1:21:52<1:10:09, 212.90it/s]

finished frames 4622400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 770528/1666666 [1:21:52<1:09:59, 213.37it/s]

finished frames 4623000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 770638/1666666 [1:21:53<1:10:02, 213.21it/s]

finished frames 4623600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▌     | 770724/1666666 [1:21:53<1:13:52, 202.15it/s]

finished frames 4624200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 770834/1666666 [1:21:54<1:11:32, 208.72it/s]

finished frames 4624800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 770943/1666666 [1:21:54<1:10:46, 210.94it/s]

finished frames 4625400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771028/1666666 [1:21:55<1:14:00, 201.71it/s]

finished frames 4626000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771135/1666666 [1:21:55<1:11:19, 209.26it/s]

finished frames 4626600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771244/1666666 [1:21:56<1:09:40, 214.19it/s]

finished frames 4627200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771332/1666666 [1:21:56<1:10:54, 210.46it/s]

finished frames 4627800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771442/1666666 [1:21:57<1:10:56, 210.30it/s]

finished frames 4628400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771527/1666666 [1:21:57<1:12:27, 205.91it/s]

finished frames 4629000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771632/1666666 [1:21:58<1:13:18, 203.47it/s]

finished frames 4629600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771737/1666666 [1:21:58<1:12:08, 206.76it/s]

finished frames 4630200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771842/1666666 [1:21:59<1:11:59, 207.18it/s]

finished frames 4630800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 771926/1666666 [1:21:59<1:12:01, 207.06it/s]

finished frames 4631400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772031/1666666 [1:22:00<1:14:04, 201.29it/s]

finished frames 4632000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772136/1666666 [1:22:00<1:12:01, 207.00it/s]

finished frames 4632600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772242/1666666 [1:22:01<1:11:36, 208.18it/s]

finished frames 4633200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772326/1666666 [1:22:01<1:11:40, 207.97it/s]

finished frames 4633800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772433/1666666 [1:22:02<1:10:59, 209.96it/s]

finished frames 4634400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772543/1666666 [1:22:02<1:09:53, 213.21it/s]

finished frames 4635000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772631/1666666 [1:22:03<1:09:36, 214.04it/s]

finished frames 4635600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772741/1666666 [1:22:03<1:09:56, 213.01it/s]

finished frames 4636200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772829/1666666 [1:22:03<1:09:51, 213.26it/s]

finished frames 4636800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 772939/1666666 [1:22:04<1:09:41, 213.75it/s]

finished frames 4637400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773027/1666666 [1:22:04<1:11:20, 208.78it/s]

finished frames 4638000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773137/1666666 [1:22:05<1:10:04, 212.53it/s]

finished frames 4638600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773225/1666666 [1:22:05<1:09:43, 213.58it/s]

finished frames 4639200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773335/1666666 [1:22:06<1:09:14, 215.03it/s]

finished frames 4639800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773423/1666666 [1:22:06<1:15:25, 197.40it/s]

finished frames 4640400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773533/1666666 [1:22:07<1:10:53, 209.96it/s]

finished frames 4641000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773643/1666666 [1:22:07<1:10:33, 210.94it/s]

finished frames 4641600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773730/1666666 [1:22:08<1:10:12, 211.99it/s]

finished frames 4642200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773840/1666666 [1:22:08<1:09:11, 215.07it/s]

finished frames 4642800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 773928/1666666 [1:22:09<1:10:04, 212.32it/s]

finished frames 4643400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774038/1666666 [1:22:09<1:11:41, 207.50it/s]

finished frames 4644000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774124/1666666 [1:22:10<1:10:47, 210.14it/s]

finished frames 4644600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774234/1666666 [1:22:10<1:09:46, 213.19it/s]

finished frames 4645200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774322/1666666 [1:22:11<1:10:20, 211.45it/s]

finished frames 4645800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774431/1666666 [1:22:11<1:10:44, 210.19it/s]

finished frames 4646400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774540/1666666 [1:22:12<1:10:37, 210.55it/s]

finished frames 4647000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774627/1666666 [1:22:12<1:10:38, 210.44it/s]

finished frames 4647600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774737/1666666 [1:22:13<1:10:47, 209.99it/s]

finished frames 4648200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774824/1666666 [1:22:13<1:10:45, 210.07it/s]

finished frames 4648800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 46%|████▋     | 774934/1666666 [1:22:13<1:10:42, 210.18it/s]

finished frames 4649400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775021/1666666 [1:22:14<1:12:53, 203.87it/s]

finished frames 4650000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775130/1666666 [1:22:14<1:10:04, 212.03it/s]

finished frames 4650600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775240/1666666 [1:22:15<1:09:47, 212.90it/s]

finished frames 4651200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775328/1666666 [1:22:15<1:10:14, 211.47it/s]

finished frames 4651800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775438/1666666 [1:22:16<1:09:42, 213.07it/s]

finished frames 4652400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775526/1666666 [1:22:16<1:09:41, 213.10it/s]

finished frames 4653000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775636/1666666 [1:22:17<1:09:21, 214.10it/s]

finished frames 4653600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775724/1666666 [1:22:17<1:10:08, 211.69it/s]

finished frames 4654200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775834/1666666 [1:22:18<1:15:22, 196.98it/s]

finished frames 4654800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 775943/1666666 [1:22:18<1:10:22, 210.95it/s]

finished frames 4655400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776031/1666666 [1:22:19<1:11:07, 208.71it/s]

finished frames 4656000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776141/1666666 [1:22:19<1:09:15, 214.28it/s]

finished frames 4656600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776229/1666666 [1:22:20<1:09:29, 213.57it/s]

finished frames 4657200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776339/1666666 [1:22:20<1:09:22, 213.91it/s]

finished frames 4657800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776427/1666666 [1:22:21<1:09:29, 213.49it/s]

finished frames 4658400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776537/1666666 [1:22:21<1:09:20, 213.96it/s]

finished frames 4659000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776625/1666666 [1:22:22<1:09:26, 213.63it/s]

finished frames 4659600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776735/1666666 [1:22:22<1:09:24, 213.69it/s]

finished frames 4660200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776823/1666666 [1:22:22<1:09:13, 214.24it/s]

finished frames 4660800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 776933/1666666 [1:22:23<1:09:28, 213.44it/s]

finished frames 4661400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777021/1666666 [1:22:23<1:11:35, 207.10it/s]

finished frames 4662000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777131/1666666 [1:22:24<1:09:32, 213.20it/s]

finished frames 4662600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777241/1666666 [1:22:24<1:09:15, 214.04it/s]

finished frames 4663200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777329/1666666 [1:22:25<1:09:04, 214.58it/s]

finished frames 4663800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777439/1666666 [1:22:25<1:08:49, 215.34it/s]

finished frames 4664400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777527/1666666 [1:22:26<1:08:52, 215.18it/s]

finished frames 4665000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777637/1666666 [1:22:26<1:08:52, 215.13it/s]

finished frames 4665600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777725/1666666 [1:22:27<1:08:40, 215.74it/s]

finished frames 4666200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777835/1666666 [1:22:27<1:08:52, 215.08it/s]

finished frames 4666800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 777923/1666666 [1:22:28<1:09:55, 211.82it/s]

finished frames 4667400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778032/1666666 [1:22:28<1:11:43, 206.50it/s]

finished frames 4668000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778141/1666666 [1:22:29<1:10:22, 210.43it/s]

finished frames 4668600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778229/1666666 [1:22:29<1:13:25, 201.66it/s]

finished frames 4669200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778338/1666666 [1:22:30<1:10:44, 209.30it/s]

finished frames 4669800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778425/1666666 [1:22:30<1:10:18, 210.54it/s]

finished frames 4670400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778535/1666666 [1:22:31<1:09:51, 211.88it/s]

finished frames 4671000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778623/1666666 [1:22:31<1:10:00, 211.41it/s]

finished frames 4671600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778733/1666666 [1:22:31<1:09:48, 211.97it/s]

finished frames 4672200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778843/1666666 [1:22:32<1:09:44, 212.16it/s]

finished frames 4672800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 778931/1666666 [1:22:32<1:09:51, 211.81it/s]

finished frames 4673400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779041/1666666 [1:22:33<1:11:26, 207.07it/s]

finished frames 4674000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779127/1666666 [1:22:33<1:10:27, 209.93it/s]

finished frames 4674600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779237/1666666 [1:22:34<1:09:45, 212.01it/s]

finished frames 4675200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779325/1666666 [1:22:34<1:09:53, 211.59it/s]

finished frames 4675800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779435/1666666 [1:22:35<1:09:50, 211.71it/s]

finished frames 4676400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779523/1666666 [1:22:35<1:09:57, 211.36it/s]

finished frames 4677000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779633/1666666 [1:22:36<1:09:46, 211.88it/s]

finished frames 4677600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779743/1666666 [1:22:36<1:09:46, 211.85it/s]

finished frames 4678200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779831/1666666 [1:22:37<1:09:48, 211.73it/s]

finished frames 4678800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 779941/1666666 [1:22:37<1:09:50, 211.62it/s]

finished frames 4679400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780029/1666666 [1:22:38<1:11:23, 206.98it/s]

finished frames 4680000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780138/1666666 [1:22:38<1:10:20, 210.04it/s]

finished frames 4680600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780226/1666666 [1:22:39<1:09:55, 211.29it/s]

finished frames 4681200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780336/1666666 [1:22:39<1:09:43, 211.87it/s]

finished frames 4681800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780424/1666666 [1:22:40<1:09:50, 211.48it/s]

finished frames 4682400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780534/1666666 [1:22:40<1:09:33, 212.31it/s]

finished frames 4683000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780622/1666666 [1:22:40<1:11:46, 205.73it/s]

finished frames 4683600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780730/1666666 [1:22:41<1:12:04, 204.86it/s]

finished frames 4684200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780838/1666666 [1:22:42<1:10:33, 209.24it/s]

finished frames 4684800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 780922/1666666 [1:22:42<1:10:48, 208.50it/s]

finished frames 4685400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781028/1666666 [1:22:42<1:12:26, 203.75it/s]

finished frames 4686000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781135/1666666 [1:22:43<1:10:49, 208.39it/s]

finished frames 4686600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781243/1666666 [1:22:43<1:10:18, 209.87it/s]

finished frames 4687200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781328/1666666 [1:22:44<1:10:25, 209.51it/s]

finished frames 4687800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781435/1666666 [1:22:44<1:10:19, 209.81it/s]

finished frames 4688400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781540/1666666 [1:22:45<1:10:30, 209.24it/s]

finished frames 4689000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781624/1666666 [1:22:45<1:10:44, 208.50it/s]

finished frames 4689600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781729/1666666 [1:22:46<1:10:58, 207.80it/s]

finished frames 4690200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781834/1666666 [1:22:46<1:10:47, 208.30it/s]

finished frames 4690800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 781939/1666666 [1:22:47<1:11:03, 207.50it/s]

finished frames 4691400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782023/1666666 [1:22:47<1:12:35, 203.11it/s]

finished frames 4692000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782129/1666666 [1:22:48<1:10:45, 208.34it/s]

finished frames 4692600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782236/1666666 [1:22:48<1:10:20, 209.54it/s]

finished frames 4693200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782343/1666666 [1:22:49<1:10:12, 209.93it/s]

finished frames 4693800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782428/1666666 [1:22:49<1:10:17, 209.64it/s]

finished frames 4694400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782535/1666666 [1:22:50<1:10:07, 210.14it/s]

finished frames 4695000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782643/1666666 [1:22:50<1:10:15, 209.71it/s]

finished frames 4695600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782728/1666666 [1:22:51<1:10:12, 209.86it/s]

finished frames 4696200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782835/1666666 [1:22:51<1:10:10, 209.92it/s]

finished frames 4696800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 782941/1666666 [1:22:52<1:13:26, 200.53it/s]

finished frames 4697400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783025/1666666 [1:22:52<1:12:57, 201.87it/s]

finished frames 4698000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783130/1666666 [1:22:53<1:11:11, 206.85it/s]

finished frames 4698600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783235/1666666 [1:22:53<1:10:47, 208.00it/s]

finished frames 4699200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783341/1666666 [1:22:54<1:10:27, 208.94it/s]

finished frames 4699800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783425/1666666 [1:22:54<1:10:41, 208.26it/s]

finished frames 4700400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783530/1666666 [1:22:55<1:10:51, 207.74it/s]

finished frames 4701000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783635/1666666 [1:22:55<1:10:38, 208.35it/s]

finished frames 4701600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783741/1666666 [1:22:56<1:10:24, 208.98it/s]

finished frames 4702200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783825/1666666 [1:22:56<1:10:25, 208.93it/s]

finished frames 4702800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 783931/1666666 [1:22:56<1:10:24, 208.94it/s]

finished frames 4703400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784037/1666666 [1:22:57<1:11:46, 204.97it/s]

finished frames 4704000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784142/1666666 [1:22:57<1:10:48, 207.74it/s]

finished frames 4704600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784227/1666666 [1:22:58<1:10:34, 208.40it/s]

finished frames 4705200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784333/1666666 [1:22:58<1:10:26, 208.78it/s]

finished frames 4705800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784438/1666666 [1:22:59<1:10:23, 208.87it/s]

finished frames 4706400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784543/1666666 [1:22:59<1:10:27, 208.66it/s]

finished frames 4707000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784628/1666666 [1:23:00<1:10:24, 208.79it/s]

finished frames 4707600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784734/1666666 [1:23:00<1:10:19, 209.02it/s]

finished frames 4708200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784839/1666666 [1:23:01<1:10:15, 209.19it/s]

finished frames 4708800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 784923/1666666 [1:23:01<1:10:20, 208.90it/s]

finished frames 4709400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785028/1666666 [1:23:02<1:11:54, 204.36it/s]

finished frames 4710000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785135/1666666 [1:23:02<1:10:20, 208.85it/s]

finished frames 4710600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785219/1666666 [1:23:03<1:10:19, 208.89it/s]

finished frames 4711200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785326/1666666 [1:23:03<1:11:34, 205.24it/s]

finished frames 4711800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785433/1666666 [1:23:04<1:10:27, 208.47it/s]

finished frames 4712400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785539/1666666 [1:23:04<1:10:14, 209.05it/s]

finished frames 4713000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785624/1666666 [1:23:05<1:10:18, 208.85it/s]

finished frames 4713600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785732/1666666 [1:23:05<1:10:18, 208.83it/s]

finished frames 4714200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785837/1666666 [1:23:06<1:10:31, 208.18it/s]

finished frames 4714800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 785942/1666666 [1:23:06<1:10:30, 208.18it/s]

finished frames 4715400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786026/1666666 [1:23:07<1:12:24, 202.72it/s]

finished frames 4716000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786131/1666666 [1:23:07<1:11:13, 206.05it/s]

finished frames 4716600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786236/1666666 [1:23:08<1:11:03, 206.48it/s]

finished frames 4717200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786341/1666666 [1:23:08<1:10:26, 208.28it/s]

finished frames 4717800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786427/1666666 [1:23:09<1:09:14, 211.87it/s]

finished frames 4718400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786537/1666666 [1:23:09<1:08:50, 213.09it/s]

finished frames 4719000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786625/1666666 [1:23:09<1:08:50, 213.04it/s]

finished frames 4719600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786735/1666666 [1:23:10<1:08:38, 213.65it/s]

finished frames 4720200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786823/1666666 [1:23:10<1:08:39, 213.56it/s]

finished frames 4720800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 786933/1666666 [1:23:11<1:08:45, 213.25it/s]

finished frames 4721400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787021/1666666 [1:23:11<1:10:49, 207.02it/s]

finished frames 4722000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787131/1666666 [1:23:12<1:08:58, 212.53it/s]

finished frames 4722600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787241/1666666 [1:23:12<1:08:56, 212.59it/s]

finished frames 4723200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787329/1666666 [1:23:13<1:08:44, 213.17it/s]

finished frames 4723800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787439/1666666 [1:23:13<1:08:24, 214.20it/s]

finished frames 4724400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787527/1666666 [1:23:14<1:08:35, 213.59it/s]

finished frames 4725000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787637/1666666 [1:23:14<1:11:42, 204.29it/s]

finished frames 4725600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787725/1666666 [1:23:15<1:15:29, 194.05it/s]

finished frames 4726200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787834/1666666 [1:23:15<1:09:41, 210.18it/s]

finished frames 4726800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 787944/1666666 [1:23:16<1:08:43, 213.10it/s]

finished frames 4727400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788032/1666666 [1:23:16<1:10:20, 208.19it/s]

finished frames 4728000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788142/1666666 [1:23:17<1:09:00, 212.19it/s]

finished frames 4728600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788230/1666666 [1:23:17<1:08:55, 212.40it/s]

finished frames 4729200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788340/1666666 [1:23:18<1:08:40, 213.17it/s]

finished frames 4729800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788428/1666666 [1:23:18<1:08:43, 213.00it/s]

finished frames 4730400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788538/1666666 [1:23:19<1:08:33, 213.48it/s]

finished frames 4731000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788626/1666666 [1:23:19<1:08:27, 213.79it/s]

finished frames 4731600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788736/1666666 [1:23:19<1:08:27, 213.76it/s]

finished frames 4732200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788824/1666666 [1:23:20<1:08:50, 212.51it/s]

finished frames 4732800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 788934/1666666 [1:23:20<1:08:43, 212.88it/s]

finished frames 4733400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789022/1666666 [1:23:21<1:10:43, 206.80it/s]

finished frames 4734000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789131/1666666 [1:23:21<1:08:52, 212.37it/s]

finished frames 4734600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789241/1666666 [1:23:22<1:08:32, 213.37it/s]

finished frames 4735200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789329/1666666 [1:23:22<1:08:16, 214.16it/s]

finished frames 4735800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789439/1666666 [1:23:23<1:08:21, 213.86it/s]

finished frames 4736400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789527/1666666 [1:23:23<1:08:20, 213.91it/s]

finished frames 4737000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789637/1666666 [1:23:24<1:08:20, 213.88it/s]

finished frames 4737600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789725/1666666 [1:23:24<1:08:21, 213.83it/s]

finished frames 4738200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789835/1666666 [1:23:25<1:08:09, 214.43it/s]

finished frames 4738800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 789923/1666666 [1:23:25<1:08:17, 213.98it/s]

finished frames 4739400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790031/1666666 [1:23:26<1:13:25, 198.97it/s]

finished frames 4740000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790140/1666666 [1:23:26<1:15:38, 193.13it/s]

finished frames 4740600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790228/1666666 [1:23:27<1:10:13, 207.99it/s]

finished frames 4741200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790338/1666666 [1:23:27<1:08:39, 212.75it/s]

finished frames 4741800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790426/1666666 [1:23:27<1:08:31, 213.11it/s]

finished frames 4742400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790536/1666666 [1:23:28<1:08:33, 212.97it/s]

finished frames 4743000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790624/1666666 [1:23:28<1:08:25, 213.37it/s]

finished frames 4743600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790734/1666666 [1:23:29<1:08:25, 213.38it/s]

finished frames 4744200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790844/1666666 [1:23:29<1:08:17, 213.75it/s]

finished frames 4744800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 790932/1666666 [1:23:30<1:08:22, 213.44it/s]

finished frames 4745400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 791020/1666666 [1:23:30<1:10:34, 206.77it/s]

finished frames 4746000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 791130/1666666 [1:23:31<1:08:31, 212.95it/s]

finished frames 4746600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 791240/1666666 [1:23:31<1:07:58, 214.66it/s]

finished frames 4747200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 791328/1666666 [1:23:32<1:08:08, 214.08it/s]

finished frames 4747800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 791438/1666666 [1:23:32<1:08:17, 213.59it/s]

finished frames 4748400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 791526/1666666 [1:23:33<1:08:08, 214.06it/s]

finished frames 4749000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 47%|████▋     | 791636/1666666 [1:23:33<1:08:14, 213.68it/s]

finished frames 4749600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 791724/1666666 [1:23:34<1:08:00, 214.43it/s]

finished frames 4750200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 791834/1666666 [1:23:34<1:07:48, 215.02it/s]

finished frames 4750800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 791944/1666666 [1:23:35<1:07:45, 215.13it/s]

finished frames 4751400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792032/1666666 [1:23:35<1:09:25, 209.99it/s]

finished frames 4752000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792142/1666666 [1:23:36<1:07:59, 214.35it/s]

finished frames 4752600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792230/1666666 [1:23:36<1:07:45, 215.08it/s]

finished frames 4753200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792340/1666666 [1:23:36<1:07:59, 214.32it/s]

finished frames 4753800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792425/1666666 [1:23:37<1:13:37, 197.91it/s]

finished frames 4754400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792533/1666666 [1:23:38<1:13:43, 197.62it/s]

finished frames 4755000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792643/1666666 [1:23:38<1:09:21, 210.04it/s]

finished frames 4755600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792731/1666666 [1:23:38<1:08:23, 212.97it/s]

finished frames 4756200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792841/1666666 [1:23:39<1:08:01, 214.10it/s]

finished frames 4756800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 792929/1666666 [1:23:39<1:07:53, 214.47it/s]

finished frames 4757400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793039/1666666 [1:23:40<1:09:30, 209.45it/s]

finished frames 4758000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793127/1666666 [1:23:40<1:08:38, 212.12it/s]

finished frames 4758600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793237/1666666 [1:23:41<1:08:11, 213.48it/s]

finished frames 4759200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793325/1666666 [1:23:41<1:08:10, 213.53it/s]

finished frames 4759800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793435/1666666 [1:23:42<1:07:58, 214.12it/s]

finished frames 4760400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793523/1666666 [1:23:42<1:07:40, 215.02it/s]

finished frames 4761000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793633/1666666 [1:23:43<1:07:43, 214.82it/s]

finished frames 4761600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793743/1666666 [1:23:43<1:07:34, 215.29it/s]

finished frames 4762200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793831/1666666 [1:23:44<1:07:47, 214.60it/s]

finished frames 4762800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 793941/1666666 [1:23:44<1:08:09, 213.41it/s]

finished frames 4763400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794029/1666666 [1:23:45<1:09:21, 209.72it/s]

finished frames 4764000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794139/1666666 [1:23:45<1:08:25, 212.54it/s]

finished frames 4764600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794227/1666666 [1:23:45<1:08:21, 212.70it/s]

finished frames 4765200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794337/1666666 [1:23:46<1:08:04, 213.56it/s]

finished frames 4765800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794425/1666666 [1:23:46<1:08:11, 213.17it/s]

finished frames 4766400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794535/1666666 [1:23:47<1:08:08, 213.30it/s]

finished frames 4767000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794623/1666666 [1:23:47<1:07:52, 214.11it/s]

finished frames 4767600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794733/1666666 [1:23:48<1:14:03, 196.21it/s]

finished frames 4768200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794842/1666666 [1:23:48<1:09:12, 209.98it/s]

finished frames 4768800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 794930/1666666 [1:23:49<1:10:46, 205.28it/s]

finished frames 4769400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795036/1666666 [1:23:49<1:12:18, 200.92it/s]

finished frames 4770000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795141/1666666 [1:23:50<1:10:07, 207.14it/s]

finished frames 4770600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795226/1666666 [1:23:50<1:09:41, 208.43it/s]

finished frames 4771200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795332/1666666 [1:23:51<1:09:34, 208.72it/s]

finished frames 4771800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795437/1666666 [1:23:51<1:09:36, 208.63it/s]

finished frames 4772400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795542/1666666 [1:23:52<1:09:33, 208.71it/s]

finished frames 4773000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795626/1666666 [1:23:52<1:09:46, 208.05it/s]

finished frames 4773600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795732/1666666 [1:23:53<1:09:52, 207.75it/s]

finished frames 4774200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795838/1666666 [1:23:53<1:10:56, 204.58it/s]

finished frames 4774800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 795925/1666666 [1:23:54<1:08:54, 210.61it/s]

finished frames 4775400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796033/1666666 [1:23:54<1:09:41, 208.23it/s]

finished frames 4776000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796122/1666666 [1:23:55<1:08:12, 212.74it/s]

finished frames 4776600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796232/1666666 [1:23:55<1:08:47, 210.88it/s]

finished frames 4777200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796342/1666666 [1:23:56<1:07:05, 216.21it/s]

finished frames 4777800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796432/1666666 [1:23:56<1:06:13, 219.01it/s]

finished frames 4778400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796543/1666666 [1:23:57<1:07:31, 214.77it/s]

finished frames 4779000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796631/1666666 [1:23:57<1:07:52, 213.64it/s]

finished frames 4779600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796740/1666666 [1:23:57<1:10:37, 205.31it/s]

finished frames 4780200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796824/1666666 [1:23:58<1:10:34, 205.43it/s]

finished frames 4780800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 796929/1666666 [1:23:58<1:09:52, 207.46it/s]

finished frames 4781400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797037/1666666 [1:23:59<1:09:44, 207.83it/s]

finished frames 4782000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797124/1666666 [1:23:59<1:11:27, 202.83it/s]

finished frames 4782600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797232/1666666 [1:24:00<1:13:00, 198.47it/s]

finished frames 4783200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797341/1666666 [1:24:00<1:08:29, 211.55it/s]

finished frames 4783800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797429/1666666 [1:24:01<1:07:40, 214.09it/s]

finished frames 4784400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797539/1666666 [1:24:01<1:07:24, 214.91it/s]

finished frames 4785000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797627/1666666 [1:24:02<1:07:21, 215.01it/s]

finished frames 4785600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797737/1666666 [1:24:02<1:07:11, 215.53it/s]

finished frames 4786200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797825/1666666 [1:24:03<1:07:16, 215.24it/s]

finished frames 4786800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 797935/1666666 [1:24:03<1:07:17, 215.17it/s]

finished frames 4787400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798023/1666666 [1:24:04<1:08:36, 211.03it/s]

finished frames 4788000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798133/1666666 [1:24:04<1:07:38, 214.00it/s]

finished frames 4788600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798243/1666666 [1:24:05<1:07:31, 214.35it/s]

finished frames 4789200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798331/1666666 [1:24:05<1:07:30, 214.38it/s]

finished frames 4789800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798441/1666666 [1:24:06<1:07:19, 214.93it/s]

finished frames 4790400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798529/1666666 [1:24:06<1:07:36, 214.01it/s]

finished frames 4791000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798639/1666666 [1:24:06<1:07:28, 214.40it/s]

finished frames 4791600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798727/1666666 [1:24:07<1:07:30, 214.28it/s]

finished frames 4792200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798837/1666666 [1:24:07<1:07:16, 215.02it/s]

finished frames 4792800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 798925/1666666 [1:24:08<1:07:20, 214.76it/s]

finished frames 4793400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799035/1666666 [1:24:08<1:09:03, 209.41it/s]

finished frames 4794000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799123/1666666 [1:24:09<1:08:09, 212.14it/s]

finished frames 4794600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799233/1666666 [1:24:09<1:07:52, 213.01it/s]

finished frames 4795200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799343/1666666 [1:24:10<1:07:40, 213.61it/s]

finished frames 4795800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799431/1666666 [1:24:10<1:08:12, 211.88it/s]

finished frames 4796400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799539/1666666 [1:24:11<1:10:34, 204.80it/s]

finished frames 4797000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799624/1666666 [1:24:11<1:13:15, 197.25it/s]

finished frames 4797600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799732/1666666 [1:24:12<1:09:10, 208.87it/s]

finished frames 4798200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799842/1666666 [1:24:12<1:08:17, 211.56it/s]

finished frames 4798800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 799930/1666666 [1:24:13<1:08:24, 211.17it/s]

finished frames 4799400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800018/1666666 [1:24:13<1:11:59, 200.62it/s]

finished frames 4800000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800127/1666666 [1:24:14<1:08:54, 209.58it/s]

finished frames 4800600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800237/1666666 [1:24:14<1:08:27, 210.92it/s]

finished frames 4801200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800325/1666666 [1:24:15<1:08:24, 211.09it/s]

finished frames 4801800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800435/1666666 [1:24:15<1:08:22, 211.15it/s]

finished frames 4802400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800523/1666666 [1:24:15<1:08:21, 211.16it/s]

finished frames 4803000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800633/1666666 [1:24:16<1:08:19, 211.25it/s]

finished frames 4803600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800743/1666666 [1:24:17<1:08:16, 211.38it/s]

finished frames 4804200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800831/1666666 [1:24:17<1:08:17, 211.30it/s]

finished frames 4804800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 800941/1666666 [1:24:17<1:08:16, 211.31it/s]

finished frames 4805400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801028/1666666 [1:24:18<1:10:59, 203.24it/s]

finished frames 4806000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801137/1666666 [1:24:18<1:08:21, 211.02it/s]

finished frames 4806600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801225/1666666 [1:24:19<1:08:11, 211.52it/s]

finished frames 4807200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801335/1666666 [1:24:19<1:08:10, 211.56it/s]

finished frames 4807800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801423/1666666 [1:24:20<1:08:10, 211.54it/s]

finished frames 4808400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801533/1666666 [1:24:20<1:08:11, 211.43it/s]

finished frames 4809000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801643/1666666 [1:24:21<1:08:00, 211.97it/s]

finished frames 4809600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801731/1666666 [1:24:21<1:08:04, 211.79it/s]

finished frames 4810200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801819/1666666 [1:24:22<1:08:24, 210.72it/s]

finished frames 4810800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 801928/1666666 [1:24:22<1:09:28, 207.47it/s]

finished frames 4811400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802036/1666666 [1:24:23<1:09:53, 206.19it/s]

finished frames 4812000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802122/1666666 [1:24:23<1:08:32, 210.22it/s]

finished frames 4812600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802232/1666666 [1:24:24<1:08:11, 211.27it/s]

finished frames 4813200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802342/1666666 [1:24:24<1:07:59, 211.85it/s]

finished frames 4813800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802430/1666666 [1:24:25<1:07:26, 213.58it/s]

finished frames 4814400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802540/1666666 [1:24:25<1:08:00, 211.77it/s]

finished frames 4815000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802628/1666666 [1:24:25<1:08:21, 210.69it/s]

finished frames 4815600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802736/1666666 [1:24:26<1:08:24, 210.47it/s]

finished frames 4816200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802823/1666666 [1:24:26<1:08:33, 210.03it/s]

finished frames 4816800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 802933/1666666 [1:24:27<1:08:14, 210.93it/s]

finished frames 4817400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803020/1666666 [1:24:27<1:10:41, 203.64it/s]

finished frames 4818000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803126/1666666 [1:24:28<1:09:10, 208.06it/s]

finished frames 4818600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803231/1666666 [1:24:28<1:09:24, 207.32it/s]

finished frames 4819200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803336/1666666 [1:24:29<1:09:30, 207.00it/s]

finished frames 4819800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803441/1666666 [1:24:29<1:09:25, 207.23it/s]

finished frames 4820400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803525/1666666 [1:24:30<1:09:37, 206.60it/s]

finished frames 4821000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803630/1666666 [1:24:30<1:09:35, 206.70it/s]

finished frames 4821600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803738/1666666 [1:24:31<1:08:17, 210.62it/s]

finished frames 4822200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803825/1666666 [1:24:31<1:08:08, 211.03it/s]

finished frames 4822800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 803935/1666666 [1:24:32<1:07:56, 211.62it/s]

finished frames 4823400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804022/1666666 [1:24:32<1:09:37, 206.51it/s]

finished frames 4824000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804130/1666666 [1:24:33<1:08:23, 210.18it/s]

finished frames 4824600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804237/1666666 [1:24:33<1:11:47, 200.20it/s]

finished frames 4825200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804342/1666666 [1:24:34<1:09:37, 206.44it/s]

finished frames 4825800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804427/1666666 [1:24:34<1:09:10, 207.73it/s]

finished frames 4826400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804534/1666666 [1:24:35<1:08:51, 208.66it/s]

finished frames 4827000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804640/1666666 [1:24:35<1:08:42, 209.12it/s]

finished frames 4827600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804724/1666666 [1:24:36<1:08:58, 208.25it/s]

finished frames 4828200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804829/1666666 [1:24:36<1:09:10, 207.67it/s]

finished frames 4828800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 804934/1666666 [1:24:37<1:09:19, 207.16it/s]

finished frames 4829400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805039/1666666 [1:24:37<1:10:51, 202.68it/s]

finished frames 4830000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805123/1666666 [1:24:38<1:09:43, 205.92it/s]

finished frames 4830600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805228/1666666 [1:24:38<1:09:18, 207.14it/s]

finished frames 4831200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805333/1666666 [1:24:39<1:09:16, 207.23it/s]

finished frames 4831800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805438/1666666 [1:24:39<1:09:05, 207.77it/s]

finished frames 4832400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805522/1666666 [1:24:39<1:09:05, 207.72it/s]

finished frames 4833000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805627/1666666 [1:24:40<1:09:35, 206.22it/s]

finished frames 4833600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805732/1666666 [1:24:40<1:09:26, 206.62it/s]

finished frames 4834200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805837/1666666 [1:24:41<1:09:08, 207.49it/s]

finished frames 4834800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 805942/1666666 [1:24:41<1:09:10, 207.39it/s]

finished frames 4835400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806026/1666666 [1:24:42<1:10:58, 202.11it/s]

finished frames 4836000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806133/1666666 [1:24:42<1:09:02, 207.74it/s]

finished frames 4836600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806241/1666666 [1:24:43<1:08:26, 209.54it/s]

finished frames 4837200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806326/1666666 [1:24:43<1:08:21, 209.74it/s]

finished frames 4837800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806433/1666666 [1:24:44<1:08:23, 209.62it/s]

finished frames 4838400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806517/1666666 [1:24:44<1:20:54, 177.19it/s]

finished frames 4839000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806623/1666666 [1:24:45<1:10:37, 202.95it/s]

finished frames 4839600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806732/1666666 [1:24:45<1:08:34, 208.98it/s]

finished frames 4840200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806840/1666666 [1:24:46<1:07:40, 211.78it/s]

finished frames 4840800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 806928/1666666 [1:24:46<1:07:41, 211.70it/s]

finished frames 4841400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807036/1666666 [1:24:47<1:11:03, 201.64it/s]

finished frames 4842000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807123/1666666 [1:24:47<1:08:12, 210.00it/s]

finished frames 4842600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807233/1666666 [1:24:48<1:07:17, 212.88it/s]

finished frames 4843200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807343/1666666 [1:24:48<1:07:28, 212.28it/s]

finished frames 4843800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807431/1666666 [1:24:49<1:07:54, 210.90it/s]

finished frames 4844400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807538/1666666 [1:24:49<1:08:28, 209.12it/s]

finished frames 4845000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807622/1666666 [1:24:50<1:08:53, 207.81it/s]

finished frames 4845600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807728/1666666 [1:24:50<1:08:52, 207.87it/s]

finished frames 4846200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807835/1666666 [1:24:51<1:08:36, 208.61it/s]

finished frames 4846800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 807942/1666666 [1:24:51<1:08:19, 209.49it/s]

finished frames 4847400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 808026/1666666 [1:24:52<1:10:19, 203.49it/s]

finished frames 4848000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 808132/1666666 [1:24:52<1:08:49, 207.92it/s]

finished frames 4848600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 808238/1666666 [1:24:53<1:08:39, 208.40it/s]

finished frames 4849200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 48%|████▊     | 808323/1666666 [1:24:53<1:08:32, 208.71it/s]

finished frames 4849800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 808429/1666666 [1:24:53<1:08:23, 209.14it/s]

finished frames 4850400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 808535/1666666 [1:24:54<1:08:20, 209.26it/s]

finished frames 4851000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 808642/1666666 [1:24:55<1:08:15, 209.49it/s]

finished frames 4851600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 808727/1666666 [1:24:55<1:08:57, 207.36it/s]

finished frames 4852200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 808833/1666666 [1:24:55<1:08:34, 208.47it/s]

finished frames 4852800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 808939/1666666 [1:24:56<1:10:32, 202.67it/s]

finished frames 4853400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809024/1666666 [1:24:56<1:10:38, 202.33it/s]

finished frames 4854000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809129/1666666 [1:24:57<1:08:55, 207.36it/s]

finished frames 4854600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809235/1666666 [1:24:57<1:08:31, 208.55it/s]

finished frames 4855200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809341/1666666 [1:24:58<1:08:17, 209.23it/s]

finished frames 4855800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809425/1666666 [1:24:58<1:08:47, 207.70it/s]

finished frames 4856400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809533/1666666 [1:24:59<1:07:46, 210.79it/s]

finished frames 4857000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809643/1666666 [1:24:59<1:07:04, 212.93it/s]

finished frames 4857600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809731/1666666 [1:25:00<1:07:15, 212.33it/s]

finished frames 4858200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809841/1666666 [1:25:00<1:07:05, 212.87it/s]

finished frames 4858800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 809929/1666666 [1:25:01<1:06:51, 213.57it/s]

finished frames 4859400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810039/1666666 [1:25:01<1:08:01, 209.88it/s]

finished frames 4860000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810127/1666666 [1:25:02<1:07:15, 212.26it/s]

finished frames 4860600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810237/1666666 [1:25:02<1:07:22, 211.88it/s]

finished frames 4861200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810325/1666666 [1:25:03<1:07:11, 212.43it/s]

finished frames 4861800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810435/1666666 [1:25:03<1:07:14, 212.23it/s]

finished frames 4862400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810523/1666666 [1:25:04<1:06:58, 213.06it/s]

finished frames 4863000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810633/1666666 [1:25:04<1:06:48, 213.54it/s]

finished frames 4863600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810743/1666666 [1:25:05<1:06:33, 214.32it/s]

finished frames 4864200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810831/1666666 [1:25:05<1:06:28, 214.59it/s]

finished frames 4864800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 810941/1666666 [1:25:05<1:06:44, 213.69it/s]

finished frames 4865400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811029/1666666 [1:25:06<1:08:33, 208.03it/s]

finished frames 4866000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811138/1666666 [1:25:06<1:07:34, 210.99it/s]

finished frames 4866600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811226/1666666 [1:25:07<1:07:10, 212.26it/s]

finished frames 4867200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811336/1666666 [1:25:07<1:07:05, 212.47it/s]

finished frames 4867800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811424/1666666 [1:25:08<1:09:02, 206.43it/s]

finished frames 4868400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811533/1666666 [1:25:08<1:07:22, 211.53it/s]

finished frames 4869000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811643/1666666 [1:25:09<1:06:58, 212.78it/s]

finished frames 4869600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811731/1666666 [1:25:09<1:06:50, 213.20it/s]

finished frames 4870200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811841/1666666 [1:25:10<1:06:51, 213.12it/s]

finished frames 4870800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 811929/1666666 [1:25:10<1:06:39, 213.70it/s]

finished frames 4871400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 812039/1666666 [1:25:11<1:07:51, 209.88it/s]

finished frames 4872000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 812127/1666666 [1:25:11<1:07:06, 212.21it/s]

finished frames 4872600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 812237/1666666 [1:25:12<1:07:00, 212.54it/s]

finished frames 4873200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 812325/1666666 [1:25:12<1:06:59, 212.57it/s]

finished frames 4873800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▊     | 812435/1666666 [1:25:13<1:06:28, 214.20it/s]

finished frames 4874400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 812523/1666666 [1:25:13<1:06:40, 213.50it/s]

finished frames 4875000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 812633/1666666 [1:25:13<1:06:41, 213.43it/s]

finished frames 4875600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 812743/1666666 [1:25:14<1:06:40, 213.46it/s]

finished frames 4876200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 812831/1666666 [1:25:14<1:06:44, 213.19it/s]

finished frames 4876800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 812941/1666666 [1:25:15<1:06:39, 213.44it/s]

finished frames 4877400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813029/1666666 [1:25:15<1:08:15, 208.44it/s]

finished frames 4878000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813137/1666666 [1:25:16<1:07:07, 211.95it/s]

finished frames 4878600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813225/1666666 [1:25:16<1:06:50, 212.78it/s]

finished frames 4879200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813335/1666666 [1:25:17<1:06:49, 212.81it/s]

finished frames 4879800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813423/1666666 [1:25:17<1:06:51, 212.70it/s]

finished frames 4880400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813533/1666666 [1:25:18<1:06:48, 212.81it/s]

finished frames 4881000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813621/1666666 [1:25:18<1:15:37, 188.00it/s]

finished frames 4881600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813730/1666666 [1:25:19<1:07:57, 209.18it/s]

finished frames 4882200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813840/1666666 [1:25:19<1:06:30, 213.73it/s]

finished frames 4882800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 813928/1666666 [1:25:20<1:06:14, 214.55it/s]

finished frames 4883400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814038/1666666 [1:25:20<1:07:55, 209.19it/s]

finished frames 4884000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814125/1666666 [1:25:21<1:07:01, 211.97it/s]

finished frames 4884600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814235/1666666 [1:25:21<1:06:40, 213.07it/s]

finished frames 4885200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814323/1666666 [1:25:21<1:06:29, 213.63it/s]

finished frames 4885800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814433/1666666 [1:25:22<1:06:31, 213.51it/s]

finished frames 4886400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814543/1666666 [1:25:23<1:06:24, 213.87it/s]

finished frames 4887000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814631/1666666 [1:25:23<1:06:30, 213.54it/s]

finished frames 4887600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814741/1666666 [1:25:23<1:06:22, 213.91it/s]

finished frames 4888200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814829/1666666 [1:25:24<1:06:22, 213.90it/s]

finished frames 4888800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 814939/1666666 [1:25:24<1:06:17, 214.11it/s]

finished frames 4889400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815027/1666666 [1:25:25<1:07:42, 209.62it/s]

finished frames 4890000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815137/1666666 [1:25:25<1:06:16, 214.15it/s]

finished frames 4890600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815225/1666666 [1:25:26<1:06:05, 214.69it/s]

finished frames 4891200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815335/1666666 [1:25:26<1:05:42, 215.91it/s]

finished frames 4891800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815423/1666666 [1:25:27<1:05:45, 215.75it/s]

finished frames 4892400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815533/1666666 [1:25:27<1:05:41, 215.96it/s]

finished frames 4893000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815643/1666666 [1:25:28<1:05:41, 215.89it/s]

finished frames 4893600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815731/1666666 [1:25:28<1:05:44, 215.70it/s]

finished frames 4894200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815841/1666666 [1:25:29<1:05:34, 216.25it/s]

finished frames 4894800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 815929/1666666 [1:25:29<1:05:41, 215.86it/s]

finished frames 4895400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816017/1666666 [1:25:29<1:19:06, 179.20it/s]

finished frames 4896000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816128/1666666 [1:25:30<1:07:16, 210.73it/s]

finished frames 4896600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816238/1666666 [1:25:30<1:05:48, 215.37it/s]

finished frames 4897200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816326/1666666 [1:25:31<1:05:44, 215.56it/s]

finished frames 4897800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816436/1666666 [1:25:31<1:05:29, 216.38it/s]

finished frames 4898400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816524/1666666 [1:25:32<1:05:32, 216.20it/s]

finished frames 4899000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816634/1666666 [1:25:32<1:05:29, 216.32it/s]

finished frames 4899600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816744/1666666 [1:25:33<1:05:35, 215.98it/s]

finished frames 4900200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816832/1666666 [1:25:33<1:05:41, 215.64it/s]

finished frames 4900800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 816942/1666666 [1:25:34<1:05:33, 216.00it/s]

finished frames 4901400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817030/1666666 [1:25:34<1:06:43, 212.24it/s]

finished frames 4902000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817140/1666666 [1:25:35<1:05:46, 215.24it/s]

finished frames 4902600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817228/1666666 [1:25:35<1:05:38, 215.65it/s]

finished frames 4903200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817338/1666666 [1:25:36<1:05:25, 216.34it/s]

finished frames 4903800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817426/1666666 [1:25:36<1:05:31, 216.00it/s]

finished frames 4904400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817536/1666666 [1:25:37<1:05:42, 215.39it/s]

finished frames 4905000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817624/1666666 [1:25:37<1:05:34, 215.81it/s]

finished frames 4905600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817734/1666666 [1:25:37<1:05:39, 215.49it/s]

finished frames 4906200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817844/1666666 [1:25:38<1:05:42, 215.31it/s]

finished frames 4906800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 817932/1666666 [1:25:38<1:05:33, 215.75it/s]

finished frames 4907400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818042/1666666 [1:25:39<1:06:40, 212.13it/s]

finished frames 4908000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818130/1666666 [1:25:39<1:07:01, 211.02it/s]

finished frames 4908600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818237/1666666 [1:25:40<1:07:36, 209.17it/s]

finished frames 4909200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818342/1666666 [1:25:40<1:07:41, 208.89it/s]

finished frames 4909800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818426/1666666 [1:25:41<1:13:41, 191.85it/s]

finished frames 4910400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818530/1666666 [1:25:41<1:09:37, 203.04it/s]

finished frames 4911000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818635/1666666 [1:25:42<1:08:48, 205.43it/s]

finished frames 4911600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818740/1666666 [1:25:42<1:08:38, 205.87it/s]

finished frames 4912200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818824/1666666 [1:25:43<1:10:18, 200.96it/s]

finished frames 4912800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 818929/1666666 [1:25:43<1:09:49, 202.37it/s]

finished frames 4913400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819034/1666666 [1:25:44<1:10:58, 199.03it/s]

finished frames 4914000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819137/1666666 [1:25:44<1:08:53, 205.06it/s]

finished frames 4914600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819242/1666666 [1:25:45<1:07:52, 208.06it/s]

finished frames 4915200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819326/1666666 [1:25:45<1:07:50, 208.17it/s]

finished frames 4915800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819431/1666666 [1:25:46<1:07:41, 208.63it/s]

finished frames 4916400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819539/1666666 [1:25:46<1:07:16, 209.89it/s]

finished frames 4917000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819623/1666666 [1:25:47<1:07:41, 208.57it/s]

finished frames 4917600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819731/1666666 [1:25:47<1:07:12, 210.02it/s]

finished frames 4918200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819841/1666666 [1:25:48<1:07:10, 210.11it/s]

finished frames 4918800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 819928/1666666 [1:25:48<1:07:17, 209.70it/s]

finished frames 4919400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820035/1666666 [1:25:49<1:08:04, 207.30it/s]

finished frames 4920000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820123/1666666 [1:25:49<1:06:26, 212.36it/s]

finished frames 4920600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820233/1666666 [1:25:49<1:05:57, 213.88it/s]

finished frames 4921200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820343/1666666 [1:25:50<1:06:10, 213.16it/s]

finished frames 4921800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820431/1666666 [1:25:50<1:05:57, 213.81it/s]

finished frames 4922400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820541/1666666 [1:25:51<1:05:50, 214.16it/s]

finished frames 4923000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820629/1666666 [1:25:51<1:05:53, 214.00it/s]

finished frames 4923600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820739/1666666 [1:25:52<1:05:48, 214.22it/s]

finished frames 4924200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820827/1666666 [1:25:52<1:09:34, 202.62it/s]

finished frames 4924800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 820936/1666666 [1:25:53<1:09:20, 203.30it/s]

finished frames 4925400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821023/1666666 [1:25:53<1:07:58, 207.33it/s]

finished frames 4926000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821136/1666666 [1:25:54<1:05:27, 215.27it/s]

finished frames 4926600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821225/1666666 [1:25:54<1:04:46, 217.51it/s]

finished frames 4927200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821335/1666666 [1:25:55<1:06:33, 211.68it/s]

finished frames 4927800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821423/1666666 [1:25:55<1:06:33, 211.65it/s]

finished frames 4928400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821533/1666666 [1:25:56<1:05:13, 215.93it/s]

finished frames 4929000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821623/1666666 [1:25:56<1:04:21, 218.83it/s]

finished frames 4929600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821735/1666666 [1:25:57<1:04:42, 217.62it/s]

finished frames 4930200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821823/1666666 [1:25:57<1:06:00, 213.33it/s]

finished frames 4930800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 821932/1666666 [1:25:58<1:07:52, 207.43it/s]

finished frames 4931400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822040/1666666 [1:25:58<1:08:54, 204.28it/s]

finished frames 4932000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822127/1666666 [1:25:58<1:07:21, 208.98it/s]

finished frames 4932600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822232/1666666 [1:25:59<1:07:24, 208.78it/s]

finished frames 4933200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822337/1666666 [1:25:59<1:09:26, 202.66it/s]

finished frames 4933800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822443/1666666 [1:26:00<1:07:45, 207.65it/s]

finished frames 4934400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822527/1666666 [1:26:00<1:07:32, 208.29it/s]

finished frames 4935000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822633/1666666 [1:26:01<1:07:21, 208.82it/s]

finished frames 4935600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822740/1666666 [1:26:01<1:06:52, 210.31it/s]

finished frames 4936200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822828/1666666 [1:26:02<1:06:14, 212.30it/s]

finished frames 4936800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 822938/1666666 [1:26:02<1:05:47, 213.71it/s]

finished frames 4937400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823026/1666666 [1:26:03<1:07:22, 208.71it/s]

finished frames 4938000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823136/1666666 [1:26:03<1:11:45, 195.90it/s]

finished frames 4938600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823223/1666666 [1:26:04<1:07:19, 208.81it/s]

finished frames 4939200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823331/1666666 [1:26:04<1:07:27, 208.35it/s]

finished frames 4939800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823441/1666666 [1:26:05<1:06:04, 212.71it/s]

finished frames 4940400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823529/1666666 [1:26:05<1:05:50, 213.44it/s]

finished frames 4941000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823639/1666666 [1:26:06<1:05:29, 214.56it/s]

finished frames 4941600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823727/1666666 [1:26:06<1:05:27, 214.60it/s]

finished frames 4942200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823837/1666666 [1:26:07<1:05:38, 214.02it/s]

finished frames 4942800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 823925/1666666 [1:26:07<1:05:45, 213.60it/s]

finished frames 4943400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824035/1666666 [1:26:08<1:07:01, 209.54it/s]

finished frames 4944000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824123/1666666 [1:26:08<1:06:38, 210.71it/s]

finished frames 4944600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824233/1666666 [1:26:09<1:06:34, 210.92it/s]

finished frames 4945200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824343/1666666 [1:26:09<1:06:29, 211.13it/s]

finished frames 4945800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824430/1666666 [1:26:09<1:06:34, 210.83it/s]

finished frames 4946400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824540/1666666 [1:26:10<1:06:24, 211.37it/s]

finished frames 4947000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824628/1666666 [1:26:10<1:06:14, 211.86it/s]

finished frames 4947600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824738/1666666 [1:26:11<1:06:20, 211.52it/s]

finished frames 4948200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824826/1666666 [1:26:11<1:06:25, 211.22it/s]

finished frames 4948800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 49%|████▉     | 824936/1666666 [1:26:12<1:06:29, 210.99it/s]

finished frames 4949400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825023/1666666 [1:26:12<1:08:03, 206.09it/s]

finished frames 4950000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825132/1666666 [1:26:13<1:06:33, 210.73it/s]

finished frames 4950600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825242/1666666 [1:26:13<1:06:30, 210.88it/s]

finished frames 4951200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825329/1666666 [1:26:14<1:06:31, 210.77it/s]

finished frames 4951800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825439/1666666 [1:26:14<1:06:21, 211.26it/s]

finished frames 4952400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825527/1666666 [1:26:15<1:10:14, 199.58it/s]

finished frames 4953000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825635/1666666 [1:26:15<1:11:36, 195.73it/s]

finished frames 4953600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825722/1666666 [1:26:16<1:07:41, 207.05it/s]

finished frames 4954200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825830/1666666 [1:26:16<1:06:37, 210.31it/s]

finished frames 4954800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 825940/1666666 [1:26:17<1:06:18, 211.29it/s]

finished frames 4955400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826027/1666666 [1:26:17<1:07:51, 206.45it/s]

finished frames 4956000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826135/1666666 [1:26:18<1:06:37, 210.28it/s]

finished frames 4956600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826223/1666666 [1:26:18<1:06:29, 210.67it/s]

finished frames 4957200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826333/1666666 [1:26:19<1:06:35, 210.30it/s]

finished frames 4957800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826443/1666666 [1:26:19<1:06:20, 211.07it/s]

finished frames 4958400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826531/1666666 [1:26:20<1:06:13, 211.44it/s]

finished frames 4959000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826641/1666666 [1:26:20<1:06:11, 211.53it/s]

finished frames 4959600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826729/1666666 [1:26:20<1:06:19, 211.04it/s]

finished frames 4960200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826839/1666666 [1:26:21<1:06:17, 211.15it/s]

finished frames 4960800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 826927/1666666 [1:26:21<1:06:17, 211.13it/s]

finished frames 4961400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827035/1666666 [1:26:22<1:08:09, 205.31it/s]

finished frames 4962000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827143/1666666 [1:26:22<1:06:40, 209.84it/s]

finished frames 4962600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827231/1666666 [1:26:23<1:06:16, 211.10it/s]

finished frames 4963200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827341/1666666 [1:26:23<1:06:17, 210.99it/s]

finished frames 4963800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827429/1666666 [1:26:24<1:06:23, 210.69it/s]

finished frames 4964400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827539/1666666 [1:26:24<1:06:14, 211.12it/s]

finished frames 4965000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827627/1666666 [1:26:25<1:06:28, 210.37it/s]

finished frames 4965600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827737/1666666 [1:26:25<1:06:10, 211.27it/s]

finished frames 4966200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827825/1666666 [1:26:26<1:06:25, 210.46it/s]

finished frames 4966800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 827935/1666666 [1:26:26<1:06:25, 210.46it/s]

finished frames 4967400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828040/1666666 [1:26:27<1:10:03, 199.49it/s]

finished frames 4968000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828126/1666666 [1:26:27<1:07:25, 207.25it/s]

finished frames 4968600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828234/1666666 [1:26:28<1:06:39, 209.65it/s]

finished frames 4969200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828343/1666666 [1:26:28<1:06:13, 210.99it/s]

finished frames 4969800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828430/1666666 [1:26:29<1:06:18, 210.67it/s]

finished frames 4970400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828540/1666666 [1:26:29<1:06:10, 211.11it/s]

finished frames 4971000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828628/1666666 [1:26:30<1:06:19, 210.60it/s]

finished frames 4971600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828738/1666666 [1:26:30<1:06:14, 210.83it/s]

finished frames 4972200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828826/1666666 [1:26:30<1:06:23, 210.35it/s]

finished frames 4972800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 828936/1666666 [1:26:31<1:06:18, 210.55it/s]

finished frames 4973400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829022/1666666 [1:26:31<1:08:06, 204.96it/s]

finished frames 4974000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829129/1666666 [1:26:32<1:06:50, 208.85it/s]

finished frames 4974600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829237/1666666 [1:26:32<1:06:17, 210.54it/s]

finished frames 4975200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829324/1666666 [1:26:33<1:06:09, 210.92it/s]

finished frames 4975800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829434/1666666 [1:26:33<1:06:10, 210.89it/s]

finished frames 4976400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829543/1666666 [1:26:34<1:05:57, 211.55it/s]

finished frames 4977000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829631/1666666 [1:26:34<1:06:06, 211.04it/s]

finished frames 4977600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829741/1666666 [1:26:35<1:06:13, 210.63it/s]

finished frames 4978200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829828/1666666 [1:26:35<1:06:19, 210.29it/s]

finished frames 4978800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 829937/1666666 [1:26:36<1:06:18, 210.31it/s]

finished frames 4979400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830023/1666666 [1:26:36<1:07:46, 205.74it/s]

finished frames 4980000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830131/1666666 [1:26:37<1:06:36, 209.29it/s]

finished frames 4980600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830216/1666666 [1:26:37<1:12:22, 192.62it/s]

finished frames 4981200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830324/1666666 [1:26:38<1:07:10, 207.53it/s]

finished frames 4981800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830432/1666666 [1:26:38<1:06:20, 210.11it/s]

finished frames 4982400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830541/1666666 [1:26:39<1:06:07, 210.72it/s]

finished frames 4983000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830626/1666666 [1:26:39<1:06:49, 208.53it/s]

finished frames 4983600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830731/1666666 [1:26:40<1:07:21, 206.83it/s]

finished frames 4984200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830837/1666666 [1:26:40<1:07:06, 207.59it/s]

finished frames 4984800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 830943/1666666 [1:26:41<1:06:45, 208.65it/s]

finished frames 4985400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831027/1666666 [1:26:41<1:07:50, 205.31it/s]

finished frames 4986000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831134/1666666 [1:26:42<1:06:39, 208.91it/s]

finished frames 4986600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831240/1666666 [1:26:42<1:06:36, 209.03it/s]

finished frames 4987200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831325/1666666 [1:26:42<1:06:34, 209.12it/s]

finished frames 4987800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831433/1666666 [1:26:43<1:06:22, 209.74it/s]

finished frames 4988400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831539/1666666 [1:26:44<1:06:16, 210.00it/s]

finished frames 4989000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831626/1666666 [1:26:44<1:06:21, 209.71it/s]

finished frames 4989600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831734/1666666 [1:26:44<1:06:17, 209.94it/s]

finished frames 4990200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831842/1666666 [1:26:45<1:05:40, 211.85it/s]

finished frames 4990800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 831930/1666666 [1:26:45<1:05:30, 212.39it/s]

finished frames 4991400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832040/1666666 [1:26:46<1:06:47, 208.25it/s]

finished frames 4992000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832128/1666666 [1:26:46<1:05:29, 212.38it/s]

finished frames 4992600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832238/1666666 [1:26:47<1:05:13, 213.22it/s]

finished frames 4993200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832326/1666666 [1:26:47<1:05:16, 213.04it/s]

finished frames 4993800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832436/1666666 [1:26:48<1:05:16, 213.01it/s]

finished frames 4994400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832524/1666666 [1:26:48<1:05:21, 212.72it/s]

finished frames 4995000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832632/1666666 [1:26:49<1:10:00, 198.55it/s]

finished frames 4995600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832738/1666666 [1:26:49<1:07:03, 207.28it/s]

finished frames 4996200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832822/1666666 [1:26:50<1:06:59, 207.47it/s]

finished frames 4996800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 832928/1666666 [1:26:50<1:06:40, 208.42it/s]

finished frames 4997400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 833033/1666666 [1:26:51<1:08:17, 203.47it/s]

finished frames 4998000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 833138/1666666 [1:26:51<1:07:06, 207.00it/s]

finished frames 4998600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|████▉     | 833223/1666666 [1:26:52<1:06:22, 209.26it/s]

finished frames 4999200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 833333/1666666 [1:26:52<1:05:25, 212.29it/s]

finished frames 4999800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 833443/1666666 [1:26:53<1:04:53, 214.00it/s]

finished frames 5000400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 833531/1666666 [1:26:53<1:04:50, 214.14it/s]

finished frames 5001000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 833641/1666666 [1:26:54<1:04:44, 214.43it/s]

finished frames 5001600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 833729/1666666 [1:26:54<1:04:30, 215.19it/s]

finished frames 5002200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 833839/1666666 [1:26:54<1:04:15, 215.99it/s]

finished frames 5002800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 833927/1666666 [1:26:55<1:04:31, 215.07it/s]

finished frames 5003400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834037/1666666 [1:26:55<1:05:31, 211.78it/s]

finished frames 5004000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834125/1666666 [1:26:56<1:04:47, 214.17it/s]

finished frames 5004600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834235/1666666 [1:26:56<1:04:31, 215.02it/s]

finished frames 5005200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834323/1666666 [1:26:57<1:04:29, 215.12it/s]

finished frames 5005800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834433/1666666 [1:26:57<1:04:26, 215.27it/s]

finished frames 5006400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834543/1666666 [1:26:58<1:04:36, 214.69it/s]

finished frames 5007000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834631/1666666 [1:26:58<1:04:23, 215.36it/s]

finished frames 5007600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834741/1666666 [1:26:59<1:04:15, 215.79it/s]

finished frames 5008200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834829/1666666 [1:26:59<1:04:25, 215.17it/s]

finished frames 5008800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 834939/1666666 [1:27:00<1:04:13, 215.82it/s]

finished frames 5009400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835027/1666666 [1:27:00<1:05:43, 210.89it/s]

finished frames 5010000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835137/1666666 [1:27:01<1:06:33, 208.20it/s]

finished frames 5010600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835225/1666666 [1:27:01<1:04:59, 213.20it/s]

finished frames 5011200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835335/1666666 [1:27:01<1:04:28, 214.91it/s]

finished frames 5011800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835423/1666666 [1:27:02<1:04:30, 214.75it/s]

finished frames 5012400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835533/1666666 [1:27:02<1:04:33, 214.54it/s]

finished frames 5013000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835643/1666666 [1:27:03<1:04:22, 215.15it/s]

finished frames 5013600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835731/1666666 [1:27:03<1:04:32, 214.56it/s]

finished frames 5014200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835841/1666666 [1:27:04<1:04:24, 214.99it/s]

finished frames 5014800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 835929/1666666 [1:27:04<1:04:29, 214.71it/s]

finished frames 5015400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836039/1666666 [1:27:05<1:06:20, 208.67it/s]

finished frames 5016000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836127/1666666 [1:27:05<1:05:10, 212.39it/s]

finished frames 5016600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836237/1666666 [1:27:06<1:04:50, 213.44it/s]

finished frames 5017200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836325/1666666 [1:27:06<1:04:46, 213.67it/s]

finished frames 5017800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836435/1666666 [1:27:07<1:04:49, 213.45it/s]

finished frames 5018400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836523/1666666 [1:27:07<1:05:00, 212.85it/s]

finished frames 5019000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836633/1666666 [1:27:08<1:04:45, 213.61it/s]

finished frames 5019600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836743/1666666 [1:27:08<1:04:35, 214.17it/s]

finished frames 5020200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836831/1666666 [1:27:09<1:04:44, 213.60it/s]

finished frames 5020800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 836941/1666666 [1:27:09<1:04:39, 213.87it/s]

finished frames 5021400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837029/1666666 [1:27:09<1:05:57, 209.64it/s]

finished frames 5022000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837139/1666666 [1:27:10<1:04:48, 213.34it/s]

finished frames 5022600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837227/1666666 [1:27:10<1:05:07, 212.29it/s]

finished frames 5023200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837337/1666666 [1:27:11<1:05:35, 210.72it/s]

finished frames 5023800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837425/1666666 [1:27:11<1:05:32, 210.85it/s]

finished frames 5024400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837534/1666666 [1:27:12<1:07:23, 205.05it/s]

finished frames 5025000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837642/1666666 [1:27:12<1:05:56, 209.55it/s]

finished frames 5025600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837727/1666666 [1:27:13<1:05:56, 209.49it/s]

finished frames 5026200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837832/1666666 [1:27:13<1:06:30, 207.69it/s]

finished frames 5026800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 837937/1666666 [1:27:14<1:06:39, 207.20it/s]

finished frames 5027400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838021/1666666 [1:27:14<1:08:53, 200.49it/s]

finished frames 5028000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838126/1666666 [1:27:15<1:06:40, 207.10it/s]

finished frames 5028600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838231/1666666 [1:27:15<1:06:18, 208.23it/s]

finished frames 5029200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838336/1666666 [1:27:16<1:06:09, 208.65it/s]

finished frames 5029800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838441/1666666 [1:27:16<1:06:09, 208.63it/s]

finished frames 5030400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838525/1666666 [1:27:17<1:06:19, 208.08it/s]

finished frames 5031000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838630/1666666 [1:27:17<1:06:19, 208.09it/s]

finished frames 5031600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838735/1666666 [1:27:18<1:06:12, 208.42it/s]

finished frames 5032200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838840/1666666 [1:27:18<1:06:10, 208.52it/s]

finished frames 5032800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 838925/1666666 [1:27:19<1:06:05, 208.72it/s]

finished frames 5033400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839031/1666666 [1:27:19<1:07:37, 203.99it/s]

finished frames 5034000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839137/1666666 [1:27:20<1:06:27, 207.55it/s]

finished frames 5034600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839243/1666666 [1:27:20<1:06:07, 208.55it/s]

finished frames 5035200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839327/1666666 [1:27:21<1:06:05, 208.66it/s]

finished frames 5035800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839432/1666666 [1:27:21<1:06:05, 208.60it/s]

finished frames 5036400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839537/1666666 [1:27:22<1:06:01, 208.77it/s]

finished frames 5037000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839643/1666666 [1:27:22<1:05:52, 209.23it/s]

finished frames 5037600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839727/1666666 [1:27:22<1:08:56, 199.92it/s]

finished frames 5038200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839834/1666666 [1:27:23<1:06:21, 207.68it/s]

finished frames 5038800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 839939/1666666 [1:27:23<1:06:00, 208.76it/s]

finished frames 5039400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840023/1666666 [1:27:24<1:07:28, 204.19it/s]

finished frames 5040000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840128/1666666 [1:27:24<1:06:27, 207.30it/s]

finished frames 5040600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840233/1666666 [1:27:25<1:06:08, 208.26it/s]

finished frames 5041200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840338/1666666 [1:27:25<1:06:02, 208.54it/s]

finished frames 5041800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840422/1666666 [1:27:26<1:06:07, 208.25it/s]

finished frames 5042400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840528/1666666 [1:27:26<1:05:52, 209.00it/s]

finished frames 5043000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840635/1666666 [1:27:27<1:05:46, 209.29it/s]

finished frames 5043600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840742/1666666 [1:27:27<1:05:52, 208.96it/s]

finished frames 5044200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840827/1666666 [1:27:28<1:07:20, 204.38it/s]

finished frames 5044800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 840937/1666666 [1:27:28<1:04:53, 212.09it/s]

finished frames 5045400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 841025/1666666 [1:27:29<1:06:07, 208.10it/s]

finished frames 5046000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 841134/1666666 [1:27:29<1:04:58, 211.78it/s]

finished frames 5046600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 841244/1666666 [1:27:30<1:04:31, 213.22it/s]

finished frames 5047200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 841332/1666666 [1:27:30<1:04:49, 212.22it/s]

finished frames 5047800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 841442/1666666 [1:27:31<1:04:49, 212.15it/s]

finished frames 5048400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 841530/1666666 [1:27:31<1:04:49, 212.16it/s]

finished frames 5049000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 50%|█████     | 841640/1666666 [1:27:32<1:04:37, 212.77it/s]

finished frames 5049600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 841728/1666666 [1:27:32<1:04:18, 213.82it/s]

finished frames 5050200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 841838/1666666 [1:27:33<1:04:24, 213.42it/s]

finished frames 5050800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 841926/1666666 [1:27:33<1:04:17, 213.78it/s]

finished frames 5051400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842036/1666666 [1:27:33<1:05:32, 209.70it/s]

finished frames 5052000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842124/1666666 [1:27:34<1:04:18, 213.72it/s]

finished frames 5052600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842232/1666666 [1:27:34<1:07:27, 203.71it/s]

finished frames 5053200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842340/1666666 [1:27:35<1:05:50, 208.65it/s]

finished frames 5053800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842425/1666666 [1:27:35<1:05:52, 208.53it/s]

finished frames 5054400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842530/1666666 [1:27:36<1:05:57, 208.24it/s]

finished frames 5055000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842636/1666666 [1:27:36<1:05:47, 208.74it/s]

finished frames 5055600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842742/1666666 [1:27:37<1:05:43, 208.92it/s]

finished frames 5056200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842827/1666666 [1:27:37<1:05:57, 208.15it/s]

finished frames 5056800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 842932/1666666 [1:27:38<1:06:15, 207.20it/s]

finished frames 5057400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843039/1666666 [1:27:38<1:06:36, 206.07it/s]

finished frames 5058000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843126/1666666 [1:27:39<1:04:40, 212.21it/s]

finished frames 5058600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843236/1666666 [1:27:39<1:04:19, 213.35it/s]

finished frames 5059200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843324/1666666 [1:27:40<1:04:16, 213.49it/s]

finished frames 5059800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843434/1666666 [1:27:40<1:04:35, 212.43it/s]

finished frames 5060400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843522/1666666 [1:27:41<1:04:38, 212.23it/s]

finished frames 5061000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843629/1666666 [1:27:41<1:05:35, 209.12it/s]

finished frames 5061600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843736/1666666 [1:27:42<1:05:06, 210.67it/s]

finished frames 5062200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843822/1666666 [1:27:42<1:05:39, 208.89it/s]

finished frames 5062800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 843928/1666666 [1:27:43<1:05:56, 207.92it/s]

finished frames 5063400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844033/1666666 [1:27:43<1:07:10, 204.11it/s]

finished frames 5064000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844138/1666666 [1:27:44<1:06:07, 207.32it/s]

finished frames 5064600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844222/1666666 [1:27:44<1:06:09, 207.19it/s]

finished frames 5065200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844327/1666666 [1:27:44<1:06:11, 207.03it/s]

finished frames 5065800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844432/1666666 [1:27:45<1:08:49, 199.11it/s]

finished frames 5066400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844516/1666666 [1:27:45<1:12:38, 188.64it/s]

finished frames 5067000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844626/1666666 [1:27:46<1:05:34, 208.93it/s]

finished frames 5067600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844736/1666666 [1:27:46<1:04:34, 212.12it/s]

finished frames 5068200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844824/1666666 [1:27:47<1:04:40, 211.78it/s]

finished frames 5068800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 844934/1666666 [1:27:47<1:04:25, 212.56it/s]

finished frames 5069400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845022/1666666 [1:27:48<1:06:25, 206.17it/s]

finished frames 5070000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845132/1666666 [1:27:48<1:04:50, 211.15it/s]

finished frames 5070600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845242/1666666 [1:27:49<1:04:28, 212.31it/s]

finished frames 5071200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845330/1666666 [1:27:49<1:04:40, 211.68it/s]

finished frames 5071800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845440/1666666 [1:27:50<1:04:30, 212.18it/s]

finished frames 5072400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845528/1666666 [1:27:50<1:04:26, 212.35it/s]

finished frames 5073000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845638/1666666 [1:27:51<1:04:29, 212.16it/s]

finished frames 5073600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845726/1666666 [1:27:51<1:04:36, 211.79it/s]

finished frames 5074200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845836/1666666 [1:27:52<1:04:21, 212.59it/s]

finished frames 5074800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 845924/1666666 [1:27:52<1:04:26, 212.29it/s]

finished frames 5075400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846034/1666666 [1:27:53<1:05:42, 208.15it/s]

finished frames 5076000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846121/1666666 [1:27:53<1:04:53, 210.74it/s]

finished frames 5076600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846231/1666666 [1:27:54<1:03:44, 214.50it/s]

finished frames 5077200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846341/1666666 [1:27:54<1:03:41, 214.66it/s]

finished frames 5077800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846430/1666666 [1:27:54<1:03:48, 214.26it/s]

finished frames 5078400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846539/1666666 [1:27:55<1:05:11, 209.67it/s]

finished frames 5079000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846627/1666666 [1:27:55<1:03:41, 214.61it/s]

finished frames 5079600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846737/1666666 [1:27:56<1:02:43, 217.89it/s]

finished frames 5080200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846828/1666666 [1:27:56<1:02:39, 218.09it/s]

finished frames 5080800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 846939/1666666 [1:27:57<1:02:57, 217.03it/s]

finished frames 5081400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847026/1666666 [1:27:57<1:05:48, 207.59it/s]

finished frames 5082000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847134/1666666 [1:27:58<1:04:29, 211.77it/s]

finished frames 5082600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847222/1666666 [1:27:58<1:04:39, 211.20it/s]

finished frames 5083200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847332/1666666 [1:27:59<1:03:58, 213.44it/s]

finished frames 5083800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847442/1666666 [1:27:59<1:03:46, 214.11it/s]

finished frames 5084400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847530/1666666 [1:28:00<1:04:59, 210.07it/s]

finished frames 5085000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847640/1666666 [1:28:00<1:04:03, 213.10it/s]

finished frames 5085600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847728/1666666 [1:28:01<1:03:31, 214.88it/s]

finished frames 5086200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847838/1666666 [1:28:01<1:03:16, 215.71it/s]

finished frames 5086800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 847926/1666666 [1:28:01<1:03:15, 215.73it/s]

finished frames 5087400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848036/1666666 [1:28:02<1:04:32, 211.40it/s]

finished frames 5088000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848124/1666666 [1:28:02<1:03:43, 214.10it/s]

finished frames 5088600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848234/1666666 [1:28:03<1:03:30, 214.78it/s]

finished frames 5089200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848344/1666666 [1:28:03<1:03:22, 215.20it/s]

finished frames 5089800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848432/1666666 [1:28:04<1:03:26, 214.94it/s]

finished frames 5090400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848542/1666666 [1:28:04<1:03:42, 214.02it/s]

finished frames 5091000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848630/1666666 [1:28:05<1:03:55, 213.29it/s]

finished frames 5091600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848740/1666666 [1:28:05<1:03:51, 213.49it/s]

finished frames 5092200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848828/1666666 [1:28:06<1:03:52, 213.42it/s]

finished frames 5092800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 848938/1666666 [1:28:06<1:03:41, 213.99it/s]

finished frames 5093400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849025/1666666 [1:28:07<1:06:52, 203.76it/s]

finished frames 5094000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849133/1666666 [1:28:07<1:04:37, 210.81it/s]

finished frames 5094600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849243/1666666 [1:28:08<1:07:53, 200.65it/s]

finished frames 5095200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849330/1666666 [1:28:08<1:07:54, 200.59it/s]

finished frames 5095800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849439/1666666 [1:28:09<1:04:41, 210.52it/s]

finished frames 5096400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849527/1666666 [1:28:09<1:04:17, 211.85it/s]

finished frames 5097000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849637/1666666 [1:28:10<1:04:00, 212.73it/s]

finished frames 5097600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849725/1666666 [1:28:10<1:04:02, 212.62it/s]

finished frames 5098200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849835/1666666 [1:28:11<1:03:46, 213.49it/s]

finished frames 5098800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 849923/1666666 [1:28:11<1:04:51, 209.89it/s]

finished frames 5099400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850028/1666666 [1:28:12<1:06:47, 203.80it/s]

finished frames 5100000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850134/1666666 [1:28:12<1:05:25, 208.00it/s]

finished frames 5100600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850239/1666666 [1:28:13<1:05:33, 207.55it/s]

finished frames 5101200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850324/1666666 [1:28:13<1:05:10, 208.75it/s]

finished frames 5101800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850431/1666666 [1:28:13<1:04:50, 209.81it/s]

finished frames 5102400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850540/1666666 [1:28:14<1:04:51, 209.74it/s]

finished frames 5103000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850626/1666666 [1:28:14<1:04:34, 210.61it/s]

finished frames 5103600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850735/1666666 [1:28:15<1:04:47, 209.88it/s]

finished frames 5104200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850842/1666666 [1:28:15<1:04:49, 209.78it/s]

finished frames 5104800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 850926/1666666 [1:28:16<1:05:13, 208.45it/s]

finished frames 5105400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851031/1666666 [1:28:16<1:06:41, 203.85it/s]

finished frames 5106000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851137/1666666 [1:28:17<1:05:03, 208.92it/s]

finished frames 5106600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851225/1666666 [1:28:17<1:03:48, 212.99it/s]

finished frames 5107200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851335/1666666 [1:28:18<1:03:30, 213.99it/s]

finished frames 5107800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851423/1666666 [1:28:18<1:03:36, 213.62it/s]

finished frames 5108400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851533/1666666 [1:28:19<1:03:44, 213.14it/s]

finished frames 5109000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851643/1666666 [1:28:19<1:03:11, 214.97it/s]

finished frames 5109600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851731/1666666 [1:28:20<1:03:40, 213.33it/s]

finished frames 5110200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851841/1666666 [1:28:20<1:03:34, 213.59it/s]

finished frames 5110800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 851929/1666666 [1:28:21<1:03:27, 214.00it/s]

finished frames 5111400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852039/1666666 [1:28:21<1:04:25, 210.73it/s]

finished frames 5112000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852127/1666666 [1:28:21<1:03:30, 213.76it/s]

finished frames 5112600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852237/1666666 [1:28:22<1:03:07, 215.05it/s]

finished frames 5113200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852325/1666666 [1:28:22<1:03:05, 215.13it/s]

finished frames 5113800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852435/1666666 [1:28:23<1:03:01, 215.32it/s]

finished frames 5114400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852523/1666666 [1:28:23<1:03:17, 214.41it/s]

finished frames 5115000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852633/1666666 [1:28:24<1:03:52, 212.39it/s]

finished frames 5115600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852742/1666666 [1:28:24<1:04:34, 210.05it/s]

finished frames 5116200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852829/1666666 [1:28:25<1:04:27, 210.42it/s]

finished frames 5116800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 852939/1666666 [1:28:25<1:04:14, 211.09it/s]

finished frames 5117400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853025/1666666 [1:28:26<1:05:55, 205.68it/s]

finished frames 5118000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853133/1666666 [1:28:26<1:04:45, 209.38it/s]

finished frames 5118600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853241/1666666 [1:28:27<1:04:24, 210.47it/s]

finished frames 5119200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853329/1666666 [1:28:27<1:04:19, 210.74it/s]

finished frames 5119800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853439/1666666 [1:28:28<1:04:17, 210.80it/s]

finished frames 5120400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853527/1666666 [1:28:28<1:04:15, 210.92it/s]

finished frames 5121000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853637/1666666 [1:28:29<1:04:21, 210.52it/s]

finished frames 5121600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853725/1666666 [1:28:29<1:04:28, 210.13it/s]

finished frames 5122200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853835/1666666 [1:28:30<1:04:20, 210.57it/s]

finished frames 5122800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 853941/1666666 [1:28:30<1:04:48, 209.00it/s]

finished frames 5123400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 854025/1666666 [1:28:31<1:18:18, 172.95it/s]

finished frames 5124000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████     | 854130/1666666 [1:28:31<1:07:29, 200.67it/s]

finished frames 5124600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 854235/1666666 [1:28:32<1:05:41, 206.14it/s]

finished frames 5125200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 854340/1666666 [1:28:32<1:05:25, 206.93it/s]

finished frames 5125800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 854424/1666666 [1:28:32<1:05:29, 206.71it/s]

finished frames 5126400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 854529/1666666 [1:28:33<1:05:17, 207.30it/s]

finished frames 5127000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 854634/1666666 [1:28:34<1:05:10, 207.65it/s]

finished frames 5127600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 854739/1666666 [1:28:34<1:05:14, 207.44it/s]

finished frames 5128200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 854823/1666666 [1:28:34<1:05:07, 207.77it/s]

finished frames 5128800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 854928/1666666 [1:28:35<1:05:09, 207.61it/s]

finished frames 5129400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855033/1666666 [1:28:35<1:06:31, 203.35it/s]

finished frames 5130000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855138/1666666 [1:28:36<1:05:18, 207.11it/s]

finished frames 5130600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855222/1666666 [1:28:36<1:05:21, 206.91it/s]

finished frames 5131200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855327/1666666 [1:28:37<1:05:10, 207.45it/s]

finished frames 5131800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855432/1666666 [1:28:37<1:05:07, 207.59it/s]

finished frames 5132400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855537/1666666 [1:28:38<1:05:18, 207.00it/s]

finished frames 5133000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855642/1666666 [1:28:38<1:04:59, 207.99it/s]

finished frames 5133600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855727/1666666 [1:28:39<1:04:59, 207.93it/s]

finished frames 5134200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855832/1666666 [1:28:39<1:04:54, 208.19it/s]

finished frames 5134800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 855938/1666666 [1:28:40<1:04:43, 208.78it/s]

finished frames 5135400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856022/1666666 [1:28:40<1:06:10, 204.17it/s]

finished frames 5136000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856128/1666666 [1:28:41<1:05:02, 207.68it/s]

finished frames 5136600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856233/1666666 [1:28:41<1:10:25, 191.81it/s]

finished frames 5137200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856317/1666666 [1:28:42<1:06:27, 203.22it/s]

finished frames 5137800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856443/1666666 [1:28:42<1:05:56, 204.80it/s]

finished frames 5138400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856527/1666666 [1:28:43<1:05:11, 207.09it/s]

finished frames 5139000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856632/1666666 [1:28:43<1:04:52, 208.09it/s]

finished frames 5139600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856737/1666666 [1:28:44<1:04:38, 208.80it/s]

finished frames 5140200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856842/1666666 [1:28:44<1:04:39, 208.74it/s]

finished frames 5140800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 856926/1666666 [1:28:45<1:04:52, 208.01it/s]

finished frames 5141400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857031/1666666 [1:28:45<1:06:16, 203.58it/s]

finished frames 5142000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857137/1666666 [1:28:46<1:05:00, 207.56it/s]

finished frames 5142600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857242/1666666 [1:28:46<1:04:45, 208.30it/s]

finished frames 5143200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857326/1666666 [1:28:47<1:05:04, 207.28it/s]

finished frames 5143800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857431/1666666 [1:28:47<1:04:43, 208.39it/s]

finished frames 5144400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857536/1666666 [1:28:48<1:04:40, 208.50it/s]

finished frames 5145000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857642/1666666 [1:28:48<1:04:43, 208.35it/s]

finished frames 5145600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857726/1666666 [1:28:48<1:04:41, 208.42it/s]

finished frames 5146200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857833/1666666 [1:28:49<1:04:28, 209.06it/s]

finished frames 5146800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 857938/1666666 [1:28:50<1:04:49, 207.95it/s]

finished frames 5147400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 858022/1666666 [1:28:50<1:06:04, 203.96it/s]

finished frames 5148000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 858128/1666666 [1:28:50<1:04:44, 208.14it/s]

finished frames 5148600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 51%|█████▏    | 858234/1666666 [1:28:51<1:04:34, 208.67it/s]

finished frames 5149200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 858340/1666666 [1:28:51<1:04:33, 208.68it/s]

finished frames 5149800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 858424/1666666 [1:28:52<1:04:33, 208.66it/s]

finished frames 5150400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 858529/1666666 [1:28:52<1:04:37, 208.44it/s]

finished frames 5151000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 858636/1666666 [1:28:53<1:05:38, 205.14it/s]

finished frames 5151600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 858742/1666666 [1:28:53<1:06:37, 202.11it/s]

finished frames 5152200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 858826/1666666 [1:28:54<1:05:10, 206.58it/s]

finished frames 5152800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 858932/1666666 [1:28:54<1:04:36, 208.37it/s]

finished frames 5153400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859037/1666666 [1:28:55<1:06:43, 201.75it/s]

finished frames 5154000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859142/1666666 [1:28:55<1:05:11, 206.42it/s]

finished frames 5154600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859226/1666666 [1:28:56<1:04:57, 207.19it/s]

finished frames 5155200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859331/1666666 [1:28:56<1:05:09, 206.49it/s]

finished frames 5155800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859436/1666666 [1:28:57<1:04:55, 207.22it/s]

finished frames 5156400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859541/1666666 [1:28:57<1:04:46, 207.70it/s]

finished frames 5157000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859625/1666666 [1:28:58<1:04:49, 207.51it/s]

finished frames 5157600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859730/1666666 [1:28:58<1:04:51, 207.38it/s]

finished frames 5158200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859836/1666666 [1:28:59<1:04:25, 208.73it/s]

finished frames 5158800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 859941/1666666 [1:28:59<1:04:42, 207.80it/s]

finished frames 5159400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860025/1666666 [1:29:00<1:06:21, 202.58it/s]

finished frames 5160000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860132/1666666 [1:29:00<1:04:30, 208.38it/s]

finished frames 5160600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860242/1666666 [1:29:01<1:03:15, 212.46it/s]

finished frames 5161200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860330/1666666 [1:29:01<1:03:15, 212.45it/s]

finished frames 5161800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860440/1666666 [1:29:02<1:03:08, 212.80it/s]

finished frames 5162400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860528/1666666 [1:29:02<1:03:16, 212.32it/s]

finished frames 5163000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860638/1666666 [1:29:03<1:03:09, 212.72it/s]

finished frames 5163600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860726/1666666 [1:29:03<1:03:13, 212.45it/s]

finished frames 5164200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860836/1666666 [1:29:04<1:02:59, 213.24it/s]

finished frames 5164800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 860924/1666666 [1:29:04<1:06:57, 200.54it/s]

finished frames 5165400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861032/1666666 [1:29:04<1:05:16, 205.73it/s]

finished frames 5166000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861141/1666666 [1:29:05<1:03:39, 210.90it/s]

finished frames 5166600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861229/1666666 [1:29:05<1:03:11, 212.41it/s]

finished frames 5167200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861339/1666666 [1:29:06<1:03:06, 212.71it/s]

finished frames 5167800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861427/1666666 [1:29:06<1:03:03, 212.84it/s]

finished frames 5168400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861537/1666666 [1:29:07<1:02:42, 213.99it/s]

finished frames 5169000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861625/1666666 [1:29:07<1:02:50, 213.53it/s]

finished frames 5169600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861735/1666666 [1:29:08<1:02:48, 213.61it/s]

finished frames 5170200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861823/1666666 [1:29:08<1:02:56, 213.11it/s]

finished frames 5170800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 861933/1666666 [1:29:09<1:02:40, 214.02it/s]

finished frames 5171400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862021/1666666 [1:29:09<1:04:21, 208.35it/s]

finished frames 5172000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862131/1666666 [1:29:10<1:02:53, 213.20it/s]

finished frames 5172600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862241/1666666 [1:29:10<1:02:39, 213.98it/s]

finished frames 5173200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862329/1666666 [1:29:11<1:02:29, 214.50it/s]

finished frames 5173800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862439/1666666 [1:29:11<1:02:17, 215.17it/s]

finished frames 5174400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862527/1666666 [1:29:11<1:02:23, 214.83it/s]

finished frames 5175000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862637/1666666 [1:29:12<1:02:29, 214.45it/s]

finished frames 5175600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862725/1666666 [1:29:12<1:02:28, 214.49it/s]

finished frames 5176200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862835/1666666 [1:29:13<1:02:29, 214.38it/s]

finished frames 5176800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 862923/1666666 [1:29:13<1:02:36, 213.94it/s]

finished frames 5177400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863033/1666666 [1:29:14<1:03:59, 209.31it/s]

finished frames 5178000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863143/1666666 [1:29:14<1:02:56, 212.79it/s]

finished frames 5178600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863231/1666666 [1:29:15<1:02:51, 213.03it/s]

finished frames 5179200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863341/1666666 [1:29:15<1:05:29, 204.45it/s]

finished frames 5179800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863428/1666666 [1:29:16<1:11:22, 187.55it/s]

finished frames 5180400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863537/1666666 [1:29:16<1:04:06, 208.78it/s]

finished frames 5181000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863625/1666666 [1:29:17<1:03:04, 212.18it/s]

finished frames 5181600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863735/1666666 [1:29:17<1:02:37, 213.71it/s]

finished frames 5182200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863823/1666666 [1:29:18<1:02:33, 213.92it/s]

finished frames 5182800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 863933/1666666 [1:29:18<1:02:28, 214.17it/s]

finished frames 5183400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864021/1666666 [1:29:19<1:04:27, 207.54it/s]

finished frames 5184000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864131/1666666 [1:29:19<1:03:03, 212.13it/s]

finished frames 5184600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864241/1666666 [1:29:20<1:02:41, 213.34it/s]

finished frames 5185200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864329/1666666 [1:29:20<1:02:41, 213.29it/s]

finished frames 5185800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864439/1666666 [1:29:21<1:02:36, 213.53it/s]

finished frames 5186400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864527/1666666 [1:29:21<1:02:35, 213.60it/s]

finished frames 5187000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864637/1666666 [1:29:21<1:02:41, 213.21it/s]

finished frames 5187600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864725/1666666 [1:29:22<1:02:33, 213.65it/s]

finished frames 5188200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864835/1666666 [1:29:22<1:02:42, 213.13it/s]

finished frames 5188800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 864923/1666666 [1:29:23<1:02:33, 213.63it/s]

finished frames 5189400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865032/1666666 [1:29:23<1:04:32, 206.98it/s]

finished frames 5190000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865140/1666666 [1:29:24<1:03:48, 209.36it/s]

finished frames 5190600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865226/1666666 [1:29:24<1:03:48, 209.34it/s]

finished frames 5191200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865333/1666666 [1:29:25<1:03:39, 209.78it/s]

finished frames 5191800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865441/1666666 [1:29:25<1:03:12, 211.24it/s]

finished frames 5192400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865528/1666666 [1:29:26<1:03:21, 210.75it/s]

finished frames 5193000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865638/1666666 [1:29:26<1:03:20, 210.79it/s]

finished frames 5193600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865726/1666666 [1:29:27<1:05:11, 204.75it/s]

finished frames 5194200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865834/1666666 [1:29:27<1:06:27, 200.82it/s]

finished frames 5194800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 865939/1666666 [1:29:28<1:04:28, 206.97it/s]

finished frames 5195400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866023/1666666 [1:29:28<1:05:59, 202.22it/s]

finished frames 5196000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866128/1666666 [1:29:29<1:04:33, 206.67it/s]

finished frames 5196600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866233/1666666 [1:29:29<1:04:08, 207.99it/s]

finished frames 5197200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866338/1666666 [1:29:30<1:04:20, 207.33it/s]

finished frames 5197800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866422/1666666 [1:29:30<1:04:17, 207.45it/s]

finished frames 5198400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866528/1666666 [1:29:31<1:04:10, 207.78it/s]

finished frames 5199000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866633/1666666 [1:29:31<1:04:06, 208.01it/s]

finished frames 5199600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866738/1666666 [1:29:32<1:04:08, 207.87it/s]

finished frames 5200200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866843/1666666 [1:29:32<1:03:56, 208.48it/s]

finished frames 5200800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 866928/1666666 [1:29:33<1:03:57, 208.41it/s]

finished frames 5201400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867033/1666666 [1:29:33<1:05:17, 204.14it/s]

finished frames 5202000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867138/1666666 [1:29:34<1:04:15, 207.37it/s]

finished frames 5202600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867222/1666666 [1:29:34<1:04:12, 207.51it/s]

finished frames 5203200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867327/1666666 [1:29:34<1:03:57, 208.29it/s]

finished frames 5203800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867432/1666666 [1:29:35<1:03:56, 208.33it/s]

finished frames 5204400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867537/1666666 [1:29:35<1:04:04, 207.86it/s]

finished frames 5205000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867642/1666666 [1:29:36<1:03:51, 208.52it/s]

finished frames 5205600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867727/1666666 [1:29:36<1:03:55, 208.31it/s]

finished frames 5206200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867833/1666666 [1:29:37<1:03:53, 208.39it/s]

finished frames 5206800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 867938/1666666 [1:29:37<1:04:04, 207.76it/s]

finished frames 5207400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868022/1666666 [1:29:38<1:08:36, 194.03it/s]

finished frames 5208000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868127/1666666 [1:29:38<1:05:01, 204.67it/s]

finished frames 5208600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868232/1666666 [1:29:39<1:04:24, 206.61it/s]

finished frames 5209200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868337/1666666 [1:29:39<1:04:18, 206.90it/s]

finished frames 5209800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868442/1666666 [1:29:40<1:04:00, 207.83it/s]

finished frames 5210400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868526/1666666 [1:29:40<1:04:05, 207.53it/s]

finished frames 5211000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868631/1666666 [1:29:41<1:03:54, 208.11it/s]

finished frames 5211600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868738/1666666 [1:29:41<1:03:16, 210.16it/s]

finished frames 5212200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868826/1666666 [1:29:42<1:02:45, 211.88it/s]

finished frames 5212800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 868936/1666666 [1:29:42<1:02:25, 212.98it/s]

finished frames 5213400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869024/1666666 [1:29:43<1:03:42, 208.66it/s]

finished frames 5214000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869133/1666666 [1:29:43<1:02:47, 211.69it/s]

finished frames 5214600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869243/1666666 [1:29:44<1:02:26, 212.85it/s]

finished frames 5215200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869331/1666666 [1:29:44<1:02:25, 212.89it/s]

finished frames 5215800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869441/1666666 [1:29:45<1:02:28, 212.68it/s]

finished frames 5216400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869529/1666666 [1:29:45<1:02:29, 212.61it/s]

finished frames 5217000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869639/1666666 [1:29:46<1:02:15, 213.35it/s]

finished frames 5217600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869727/1666666 [1:29:46<1:02:17, 213.22it/s]

finished frames 5218200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869837/1666666 [1:29:46<1:02:16, 213.24it/s]

finished frames 5218800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 869924/1666666 [1:29:47<1:03:15, 209.90it/s]

finished frames 5219400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870034/1666666 [1:29:47<1:04:39, 205.36it/s]

finished frames 5220000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870143/1666666 [1:29:48<1:02:30, 212.39it/s]

finished frames 5220600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870231/1666666 [1:29:48<1:02:15, 213.20it/s]

finished frames 5221200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870319/1666666 [1:29:49<1:02:07, 213.67it/s]

finished frames 5221800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870428/1666666 [1:29:49<1:03:31, 208.91it/s]

finished frames 5222400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870538/1666666 [1:29:50<1:02:24, 212.61it/s]

finished frames 5223000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870626/1666666 [1:29:50<1:02:19, 212.85it/s]

finished frames 5223600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870736/1666666 [1:29:51<1:02:14, 213.13it/s]

finished frames 5224200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870824/1666666 [1:29:51<1:02:06, 213.54it/s]

finished frames 5224800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 870934/1666666 [1:29:52<1:02:56, 210.71it/s]

finished frames 5225400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871021/1666666 [1:29:52<1:05:37, 202.08it/s]

finished frames 5226000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871126/1666666 [1:29:53<1:04:19, 206.11it/s]

finished frames 5226600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871231/1666666 [1:29:53<1:08:51, 192.55it/s]

finished frames 5227200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871340/1666666 [1:29:54<1:02:14, 212.99it/s]

finished frames 5227800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871428/1666666 [1:29:54<1:02:04, 213.51it/s]

finished frames 5228400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871538/1666666 [1:29:55<1:02:38, 211.56it/s]

finished frames 5229000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871624/1666666 [1:29:55<1:03:13, 209.57it/s]

finished frames 5229600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871734/1666666 [1:29:56<1:01:22, 215.85it/s]

finished frames 5230200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871844/1666666 [1:29:56<1:01:06, 216.79it/s]

finished frames 5230800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 871932/1666666 [1:29:56<1:01:52, 214.09it/s]

finished frames 5231400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872020/1666666 [1:29:57<1:03:10, 209.62it/s]

finished frames 5232000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872126/1666666 [1:29:57<1:03:10, 209.60it/s]

finished frames 5232600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872235/1666666 [1:29:58<1:02:19, 212.42it/s]

finished frames 5233200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872323/1666666 [1:29:58<1:03:32, 208.34it/s]

finished frames 5233800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872428/1666666 [1:29:59<1:03:42, 207.79it/s]

finished frames 5234400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872533/1666666 [1:29:59<1:03:28, 208.52it/s]

finished frames 5235000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872639/1666666 [1:30:00<1:05:15, 202.78it/s]

finished frames 5235600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872723/1666666 [1:30:00<1:04:09, 206.25it/s]

finished frames 5236200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872828/1666666 [1:30:01<1:08:59, 191.78it/s]

finished frames 5236800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 872935/1666666 [1:30:01<1:04:10, 206.12it/s]

finished frames 5237400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873020/1666666 [1:30:02<1:06:14, 199.71it/s]

finished frames 5238000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873125/1666666 [1:30:02<1:04:26, 205.22it/s]

finished frames 5238600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873234/1666666 [1:30:03<1:03:31, 208.16it/s]

finished frames 5239200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873339/1666666 [1:30:03<1:03:30, 208.20it/s]

finished frames 5239800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873423/1666666 [1:30:04<1:03:39, 207.68it/s]

finished frames 5240400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873528/1666666 [1:30:04<1:03:26, 208.39it/s]

finished frames 5241000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873633/1666666 [1:30:05<1:03:40, 207.60it/s]

finished frames 5241600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873738/1666666 [1:30:05<1:03:40, 207.53it/s]

finished frames 5242200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873822/1666666 [1:30:06<1:03:48, 207.10it/s]

finished frames 5242800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 873927/1666666 [1:30:06<1:03:40, 207.49it/s]

finished frames 5243400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874032/1666666 [1:30:07<1:05:06, 202.92it/s]

finished frames 5244000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874137/1666666 [1:30:07<1:03:41, 207.39it/s]

finished frames 5244600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874242/1666666 [1:30:08<1:03:29, 208.00it/s]

finished frames 5245200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874326/1666666 [1:30:08<1:03:30, 207.92it/s]

finished frames 5245800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874431/1666666 [1:30:09<1:03:29, 207.96it/s]

finished frames 5246400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874536/1666666 [1:30:09<1:03:25, 208.17it/s]

finished frames 5247000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874641/1666666 [1:30:10<1:03:25, 208.11it/s]

finished frames 5247600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874725/1666666 [1:30:10<1:03:33, 207.67it/s]

finished frames 5248200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874830/1666666 [1:30:10<1:03:29, 207.87it/s]

finished frames 5248800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 52%|█████▏    | 874935/1666666 [1:30:11<1:03:27, 207.94it/s]

finished frames 5249400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875019/1666666 [1:30:11<1:05:30, 201.42it/s]

finished frames 5250000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875127/1666666 [1:30:12<1:04:19, 205.10it/s]

finished frames 5250600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875234/1666666 [1:30:12<1:03:56, 206.30it/s]

finished frames 5251200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875342/1666666 [1:30:13<1:02:59, 209.36it/s]

finished frames 5251800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875428/1666666 [1:30:13<1:02:50, 209.83it/s]

finished frames 5252400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875535/1666666 [1:30:14<1:02:50, 209.84it/s]

finished frames 5253000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875643/1666666 [1:30:14<1:02:36, 210.56it/s]

finished frames 5253600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875731/1666666 [1:30:15<1:02:37, 210.51it/s]

finished frames 5254200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875841/1666666 [1:30:15<1:02:33, 210.68it/s]

finished frames 5254800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 875929/1666666 [1:30:16<1:02:33, 210.67it/s]

finished frames 5255400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876039/1666666 [1:30:16<1:03:42, 206.82it/s]

finished frames 5256000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876125/1666666 [1:30:17<1:03:08, 208.68it/s]

finished frames 5256600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876231/1666666 [1:30:17<1:02:58, 209.21it/s]

finished frames 5257200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876339/1666666 [1:30:18<1:02:49, 209.69it/s]

finished frames 5257800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876424/1666666 [1:30:18<1:02:58, 209.12it/s]

finished frames 5258400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876531/1666666 [1:30:19<1:02:45, 209.83it/s]

finished frames 5259000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876639/1666666 [1:30:19<1:02:36, 210.33it/s]

finished frames 5259600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876725/1666666 [1:30:20<1:02:52, 209.40it/s]

finished frames 5260200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876833/1666666 [1:30:20<1:02:42, 209.93it/s]

finished frames 5260800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 876941/1666666 [1:30:21<1:02:40, 210.01it/s]

finished frames 5261400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877026/1666666 [1:30:21<1:04:13, 204.91it/s]

finished frames 5262000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877132/1666666 [1:30:22<1:03:07, 208.46it/s]

finished frames 5262600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877239/1666666 [1:30:22<1:02:48, 209.49it/s]

finished frames 5263200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877324/1666666 [1:30:22<1:02:54, 209.15it/s]

finished frames 5263800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877431/1666666 [1:30:23<1:05:10, 201.81it/s]

finished frames 5264400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877539/1666666 [1:30:24<1:03:06, 208.40it/s]

finished frames 5265000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877626/1666666 [1:30:24<1:02:35, 210.08it/s]

finished frames 5265600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877736/1666666 [1:30:24<1:02:16, 211.12it/s]

finished frames 5266200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877824/1666666 [1:30:25<1:02:13, 211.26it/s]

finished frames 5266800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 877934/1666666 [1:30:25<1:02:03, 211.82it/s]

finished frames 5267400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878022/1666666 [1:30:26<1:03:53, 205.74it/s]

finished frames 5268000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878132/1666666 [1:30:26<1:02:12, 211.28it/s]

finished frames 5268600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878242/1666666 [1:30:27<1:01:52, 212.40it/s]

finished frames 5269200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878330/1666666 [1:30:27<1:01:32, 213.51it/s]

finished frames 5269800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878440/1666666 [1:30:28<1:01:07, 214.95it/s]

finished frames 5270400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878528/1666666 [1:30:28<1:01:23, 213.95it/s]

finished frames 5271000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878638/1666666 [1:30:29<1:01:22, 213.99it/s]

finished frames 5271600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878726/1666666 [1:30:29<1:01:17, 214.24it/s]

finished frames 5272200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878836/1666666 [1:30:30<1:01:31, 213.44it/s]

finished frames 5272800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 878923/1666666 [1:30:30<1:03:16, 207.47it/s]

finished frames 5273400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879029/1666666 [1:30:31<1:04:16, 204.24it/s]

finished frames 5274000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879138/1666666 [1:30:31<1:02:10, 211.11it/s]

finished frames 5274600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879226/1666666 [1:30:32<1:01:39, 212.84it/s]

finished frames 5275200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879336/1666666 [1:30:32<1:01:34, 213.13it/s]

finished frames 5275800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879424/1666666 [1:30:32<1:01:30, 213.30it/s]

finished frames 5276400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879534/1666666 [1:30:33<1:01:23, 213.68it/s]

finished frames 5277000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879644/1666666 [1:30:33<1:01:23, 213.68it/s]

finished frames 5277600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879732/1666666 [1:30:34<1:01:31, 213.17it/s]

finished frames 5278200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879842/1666666 [1:30:34<1:03:16, 207.25it/s]

finished frames 5278800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 879929/1666666 [1:30:35<1:04:30, 203.28it/s]

finished frames 5279400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880039/1666666 [1:30:35<1:03:23, 206.82it/s]

finished frames 5280000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880126/1666666 [1:30:36<1:02:11, 210.78it/s]

finished frames 5280600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880236/1666666 [1:30:36<1:01:41, 212.46it/s]

finished frames 5281200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880324/1666666 [1:30:37<1:01:44, 212.24it/s]

finished frames 5281800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880434/1666666 [1:30:37<1:01:36, 212.69it/s]

finished frames 5282400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880544/1666666 [1:30:38<1:01:28, 213.12it/s]

finished frames 5283000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880632/1666666 [1:30:38<1:01:38, 212.54it/s]

finished frames 5283600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880742/1666666 [1:30:39<1:01:37, 212.57it/s]

finished frames 5284200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880830/1666666 [1:30:39<1:01:42, 212.22it/s]

finished frames 5284800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 880940/1666666 [1:30:40<1:01:22, 213.39it/s]

finished frames 5285400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881028/1666666 [1:30:40<1:02:43, 208.74it/s]

finished frames 5286000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881137/1666666 [1:30:41<1:01:57, 211.29it/s]

finished frames 5286600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881225/1666666 [1:30:41<1:01:49, 211.73it/s]

finished frames 5287200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881335/1666666 [1:30:42<1:01:39, 212.26it/s]

finished frames 5287800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881423/1666666 [1:30:42<1:01:48, 211.74it/s]

finished frames 5288400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881533/1666666 [1:30:42<1:01:41, 212.09it/s]

finished frames 5289000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881643/1666666 [1:30:43<1:01:45, 211.85it/s]

finished frames 5289600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881731/1666666 [1:30:43<1:01:40, 212.14it/s]

finished frames 5290200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881841/1666666 [1:30:44<1:01:46, 211.77it/s]

finished frames 5290800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 881929/1666666 [1:30:44<1:01:32, 212.51it/s]

finished frames 5291400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882038/1666666 [1:30:45<1:03:07, 207.18it/s]

finished frames 5292000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882124/1666666 [1:30:45<1:02:26, 209.39it/s]

finished frames 5292600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882232/1666666 [1:30:46<1:03:24, 206.21it/s]

finished frames 5293200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882340/1666666 [1:30:46<1:03:33, 205.66it/s]

finished frames 5293800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882426/1666666 [1:30:47<1:02:40, 208.56it/s]

finished frames 5294400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882534/1666666 [1:30:47<1:02:20, 209.62it/s]

finished frames 5295000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882642/1666666 [1:30:48<1:02:10, 210.16it/s]

finished frames 5295600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882730/1666666 [1:30:48<1:01:57, 210.86it/s]

finished frames 5296200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882837/1666666 [1:30:49<1:02:29, 209.04it/s]

finished frames 5296800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 882942/1666666 [1:30:49<1:02:53, 207.70it/s]

finished frames 5297400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883026/1666666 [1:30:50<1:04:29, 202.49it/s]

finished frames 5298000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883131/1666666 [1:30:50<1:03:18, 206.30it/s]

finished frames 5298600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883236/1666666 [1:30:51<1:03:04, 207.03it/s]

finished frames 5299200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883341/1666666 [1:30:51<1:03:06, 206.88it/s]

finished frames 5299800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883425/1666666 [1:30:52<1:03:04, 206.95it/s]

finished frames 5300400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883530/1666666 [1:30:52<1:02:57, 207.34it/s]

finished frames 5301000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883635/1666666 [1:30:53<1:03:02, 207.03it/s]

finished frames 5301600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883740/1666666 [1:30:53<1:02:53, 207.48it/s]

finished frames 5302200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883824/1666666 [1:30:54<1:03:08, 206.61it/s]

finished frames 5302800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 883929/1666666 [1:30:54<1:03:08, 206.63it/s]

finished frames 5303400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884034/1666666 [1:30:55<1:04:17, 202.88it/s]

finished frames 5304000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884141/1666666 [1:30:55<1:02:19, 209.26it/s]

finished frames 5304600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884229/1666666 [1:30:55<1:01:26, 212.22it/s]

finished frames 5305200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884339/1666666 [1:30:56<1:00:57, 213.87it/s]

finished frames 5305800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884427/1666666 [1:30:56<1:01:07, 213.31it/s]

finished frames 5306400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884537/1666666 [1:30:57<1:04:32, 201.96it/s]

finished frames 5307000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884623/1666666 [1:30:57<1:04:33, 201.87it/s]

finished frames 5307600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884733/1666666 [1:30:58<1:01:24, 212.25it/s]

finished frames 5308200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884843/1666666 [1:30:58<1:00:36, 215.00it/s]

finished frames 5308800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 884931/1666666 [1:30:59<1:00:41, 214.66it/s]

finished frames 5309400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885041/1666666 [1:30:59<1:01:31, 211.75it/s]

finished frames 5310000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885129/1666666 [1:31:00<1:01:00, 213.48it/s]

finished frames 5310600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885239/1666666 [1:31:00<1:00:56, 213.69it/s]

finished frames 5311200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885327/1666666 [1:31:01<1:00:50, 214.06it/s]

finished frames 5311800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885437/1666666 [1:31:01<1:00:43, 214.43it/s]

finished frames 5312400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885525/1666666 [1:31:02<1:00:39, 214.64it/s]

finished frames 5313000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885635/1666666 [1:31:02<1:00:55, 213.65it/s]

finished frames 5313600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885723/1666666 [1:31:03<1:00:54, 213.71it/s]

finished frames 5314200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885833/1666666 [1:31:03<1:00:44, 214.23it/s]

finished frames 5314800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 885943/1666666 [1:31:04<1:00:36, 214.66it/s]

finished frames 5315400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886031/1666666 [1:31:04<1:02:16, 208.93it/s]

finished frames 5316000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886141/1666666 [1:31:05<1:00:52, 213.70it/s]

finished frames 5316600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886229/1666666 [1:31:05<1:00:56, 213.43it/s]

finished frames 5317200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886339/1666666 [1:31:05<1:00:56, 213.40it/s]

finished frames 5317800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886427/1666666 [1:31:06<1:00:53, 213.57it/s]

finished frames 5318400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886537/1666666 [1:31:06<1:00:54, 213.50it/s]

finished frames 5319000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886625/1666666 [1:31:07<1:00:51, 213.60it/s]

finished frames 5319600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886735/1666666 [1:31:07<1:00:48, 213.79it/s]

finished frames 5320200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886823/1666666 [1:31:08<1:00:47, 213.78it/s]

finished frames 5320800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 886932/1666666 [1:31:08<1:01:53, 209.95it/s]

finished frames 5321400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887041/1666666 [1:31:09<1:04:12, 202.36it/s]

finished frames 5322000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887128/1666666 [1:31:09<1:01:32, 211.10it/s]

finished frames 5322600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887238/1666666 [1:31:10<1:00:58, 213.05it/s]

finished frames 5323200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887326/1666666 [1:31:10<1:00:55, 213.19it/s]

finished frames 5323800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887436/1666666 [1:31:11<1:00:51, 213.39it/s]

finished frames 5324400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887524/1666666 [1:31:11<1:00:39, 214.08it/s]

finished frames 5325000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887634/1666666 [1:31:12<1:00:39, 214.03it/s]

finished frames 5325600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887744/1666666 [1:31:12<1:00:41, 213.88it/s]

finished frames 5326200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887832/1666666 [1:31:13<1:01:23, 211.41it/s]

finished frames 5326800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 887942/1666666 [1:31:13<1:01:43, 210.28it/s]

finished frames 5327400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888027/1666666 [1:31:13<1:03:21, 204.80it/s]

finished frames 5328000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888135/1666666 [1:31:14<1:01:40, 210.37it/s]

finished frames 5328600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888223/1666666 [1:31:14<1:01:24, 211.28it/s]

finished frames 5329200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888333/1666666 [1:31:15<1:01:07, 212.24it/s]

finished frames 5329800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888443/1666666 [1:31:15<1:01:14, 211.78it/s]

finished frames 5330400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888531/1666666 [1:31:16<1:01:01, 212.52it/s]

finished frames 5331000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888641/1666666 [1:31:16<1:01:15, 211.70it/s]

finished frames 5331600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888729/1666666 [1:31:17<1:00:58, 212.63it/s]

finished frames 5332200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888839/1666666 [1:31:17<1:00:54, 212.84it/s]

finished frames 5332800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 888927/1666666 [1:31:18<1:00:53, 212.85it/s]

finished frames 5333400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889037/1666666 [1:31:18<1:02:22, 207.78it/s]

finished frames 5334000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889125/1666666 [1:31:19<1:00:56, 212.64it/s]

finished frames 5334600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889235/1666666 [1:31:19<1:05:53, 196.65it/s]

finished frames 5335200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889344/1666666 [1:31:20<1:01:08, 211.92it/s]

finished frames 5335800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889432/1666666 [1:31:20<1:00:36, 213.74it/s]

finished frames 5336400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889542/1666666 [1:31:21<1:00:24, 214.43it/s]

finished frames 5337000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889630/1666666 [1:31:21<1:00:18, 214.72it/s]

finished frames 5337600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889740/1666666 [1:31:22<1:00:23, 214.40it/s]

finished frames 5338200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889828/1666666 [1:31:22<1:00:21, 214.53it/s]

finished frames 5338800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 889938/1666666 [1:31:23<1:00:28, 214.08it/s]

finished frames 5339400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890026/1666666 [1:31:23<1:01:49, 209.39it/s]

finished frames 5340000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890136/1666666 [1:31:23<1:00:15, 214.78it/s]

finished frames 5340600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890224/1666666 [1:31:24<1:00:12, 214.92it/s]

finished frames 5341200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890334/1666666 [1:31:24<1:00:10, 215.02it/s]

finished frames 5341800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890422/1666666 [1:31:25<1:00:04, 215.36it/s]

finished frames 5342400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890531/1666666 [1:31:25<1:01:41, 209.70it/s]

finished frames 5343000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890636/1666666 [1:31:26<1:01:56, 208.79it/s]

finished frames 5343600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890741/1666666 [1:31:26<1:02:32, 206.80it/s]

finished frames 5344200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890825/1666666 [1:31:27<1:02:31, 206.83it/s]

finished frames 5344800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 890930/1666666 [1:31:27<1:02:33, 206.68it/s]

finished frames 5345400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 891035/1666666 [1:31:28<1:03:26, 203.78it/s]

finished frames 5346000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 891140/1666666 [1:31:28<1:02:15, 207.60it/s]

finished frames 5346600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 891226/1666666 [1:31:29<1:01:20, 210.70it/s]

finished frames 5347200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 891336/1666666 [1:31:29<1:00:41, 212.89it/s]

finished frames 5347800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 891424/1666666 [1:31:30<1:00:31, 213.45it/s]

finished frames 5348400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 891534/1666666 [1:31:30<1:00:29, 213.56it/s]

finished frames 5349000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 53%|█████▎    | 891621/1666666 [1:31:31<1:07:12, 192.19it/s]

finished frames 5349600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 891729/1666666 [1:31:31<1:12:54, 177.16it/s]

finished frames 5350200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 891837/1666666 [1:31:32<1:03:11, 204.34it/s]

finished frames 5350800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 891942/1666666 [1:31:32<1:02:18, 207.22it/s]

finished frames 5351400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892026/1666666 [1:31:33<1:03:36, 202.97it/s]

finished frames 5352000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892131/1666666 [1:31:33<1:02:31, 206.44it/s]

finished frames 5352600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892236/1666666 [1:31:34<1:02:08, 207.72it/s]

finished frames 5353200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892341/1666666 [1:31:34<1:02:10, 207.54it/s]

finished frames 5353800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892425/1666666 [1:31:34<1:02:15, 207.28it/s]

finished frames 5354400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892530/1666666 [1:31:35<1:02:20, 206.96it/s]

finished frames 5355000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892636/1666666 [1:31:36<1:01:59, 208.09it/s]

finished frames 5355600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892741/1666666 [1:31:36<1:01:56, 208.23it/s]

finished frames 5356200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892825/1666666 [1:31:36<1:01:55, 208.30it/s]

finished frames 5356800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 892930/1666666 [1:31:37<1:01:58, 208.10it/s]

finished frames 5357400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893035/1666666 [1:31:37<1:03:22, 203.46it/s]

finished frames 5358000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893140/1666666 [1:31:38<1:02:10, 207.37it/s]

finished frames 5358600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893225/1666666 [1:31:38<1:01:52, 208.34it/s]

finished frames 5359200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893330/1666666 [1:31:39<1:01:46, 208.65it/s]

finished frames 5359800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893436/1666666 [1:31:39<1:01:39, 209.04it/s]

finished frames 5360400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893542/1666666 [1:31:40<1:01:35, 209.23it/s]

finished frames 5361000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893628/1666666 [1:31:40<1:00:45, 212.08it/s]

finished frames 5361600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893738/1666666 [1:31:41<1:00:24, 213.23it/s]

finished frames 5362200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893826/1666666 [1:31:41<1:00:35, 212.57it/s]

finished frames 5362800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 893914/1666666 [1:31:42<1:00:37, 212.42it/s]

finished frames 5363400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894023/1666666 [1:31:42<1:03:57, 201.34it/s]

finished frames 5364000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894132/1666666 [1:31:43<1:01:13, 210.30it/s]

finished frames 5364600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894242/1666666 [1:31:43<1:00:49, 211.67it/s]

finished frames 5365200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894330/1666666 [1:31:44<1:00:39, 212.23it/s]

finished frames 5365800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894440/1666666 [1:31:44<1:00:39, 212.19it/s]

finished frames 5366400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894528/1666666 [1:31:45<1:00:33, 212.52it/s]

finished frames 5367000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894638/1666666 [1:31:45<1:00:27, 212.83it/s]

finished frames 5367600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894726/1666666 [1:31:46<1:00:33, 212.47it/s]

finished frames 5368200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894836/1666666 [1:31:46<1:00:21, 213.13it/s]

finished frames 5368800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 894924/1666666 [1:31:46<1:00:32, 212.48it/s]

finished frames 5369400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 895034/1666666 [1:31:47<1:01:49, 207.99it/s]

finished frames 5370000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 895122/1666666 [1:31:47<1:00:51, 211.27it/s]

finished frames 5370600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 895232/1666666 [1:31:48<1:00:28, 212.61it/s]

finished frames 5371200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 895342/1666666 [1:31:48<1:00:26, 212.68it/s]

finished frames 5371800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 895430/1666666 [1:31:49<1:00:23, 212.83it/s]

finished frames 5372400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 895540/1666666 [1:31:49<1:00:19, 213.05it/s]

finished frames 5373000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 895628/1666666 [1:31:50<1:00:25, 212.67it/s]

finished frames 5373600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 895738/1666666 [1:31:50<1:00:00, 214.12it/s]

finished frames 5374200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▎    | 895826/1666666 [1:31:51<59:45, 215.01it/s]  

finished frames 5374800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 895936/1666666 [1:31:51<59:44, 214.99it/s]  

finished frames 5375400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896024/1666666 [1:31:52<1:01:05, 210.25it/s]

finished frames 5376000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896134/1666666 [1:31:52<59:44, 214.99it/s]  

finished frames 5376600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896244/1666666 [1:31:53<59:27, 215.95it/s]

finished frames 5377200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896332/1666666 [1:31:53<59:50, 214.53it/s]

finished frames 5377800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896442/1666666 [1:31:54<1:00:15, 213.04it/s]

finished frames 5378400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896529/1666666 [1:31:54<1:01:16, 209.48it/s]

finished frames 5379000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896639/1666666 [1:31:55<1:01:01, 210.33it/s]

finished frames 5379600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896725/1666666 [1:31:55<1:01:26, 208.85it/s]

finished frames 5380200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896834/1666666 [1:31:55<1:00:41, 211.41it/s]

finished frames 5380800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 896922/1666666 [1:31:56<1:00:15, 212.91it/s]

finished frames 5381400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897032/1666666 [1:31:56<1:01:09, 209.73it/s]

finished frames 5382000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897140/1666666 [1:31:57<1:00:08, 213.26it/s]

finished frames 5382600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897227/1666666 [1:31:57<1:01:17, 209.24it/s]

finished frames 5383200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897333/1666666 [1:31:58<1:01:18, 209.15it/s]

finished frames 5383800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897443/1666666 [1:31:58<1:00:56, 210.39it/s]

finished frames 5384400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897531/1666666 [1:31:59<1:00:16, 212.69it/s]

finished frames 5385000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897641/1666666 [1:31:59<59:53, 213.99it/s]  

finished frames 5385600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897729/1666666 [1:32:00<59:39, 214.83it/s]  

finished frames 5386200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897839/1666666 [1:32:00<1:00:24, 212.14it/s]

finished frames 5386800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 897927/1666666 [1:32:01<59:42, 214.58it/s]  

finished frames 5387400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898037/1666666 [1:32:01<1:00:40, 211.12it/s]

finished frames 5388000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898125/1666666 [1:32:02<59:58, 213.59it/s]  

finished frames 5388600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898235/1666666 [1:32:02<59:28, 215.35it/s]  

finished frames 5389200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898323/1666666 [1:32:02<59:44, 214.33it/s]

finished frames 5389800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898433/1666666 [1:32:03<59:55, 213.66it/s]  

finished frames 5390400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898543/1666666 [1:32:04<59:44, 214.32it/s]  

finished frames 5391000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898631/1666666 [1:32:04<59:26, 215.34it/s]

finished frames 5391600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898719/1666666 [1:32:04<1:04:45, 197.65it/s]

finished frames 5392200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898829/1666666 [1:32:05<1:00:17, 212.27it/s]

finished frames 5392800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 898939/1666666 [1:32:05<59:31, 214.96it/s]  

finished frames 5393400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899027/1666666 [1:32:06<1:00:34, 211.18it/s]

finished frames 5394000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899137/1666666 [1:32:06<59:45, 214.04it/s]  

finished frames 5394600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899225/1666666 [1:32:07<59:36, 214.60it/s]

finished frames 5395200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899335/1666666 [1:32:07<59:24, 215.27it/s]

finished frames 5395800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899423/1666666 [1:32:08<59:36, 214.55it/s]

finished frames 5396400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899533/1666666 [1:32:08<59:22, 215.33it/s]

finished frames 5397000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899643/1666666 [1:32:09<59:14, 215.80it/s]

finished frames 5397600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899731/1666666 [1:32:09<59:17, 215.61it/s]

finished frames 5398200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899841/1666666 [1:32:10<59:09, 216.03it/s]

finished frames 5398800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 899929/1666666 [1:32:10<59:19, 215.41it/s]

finished frames 5399400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900039/1666666 [1:32:11<1:01:03, 209.27it/s]

finished frames 5400000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900127/1666666 [1:32:11<1:00:11, 212.26it/s]

finished frames 5400600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900237/1666666 [1:32:11<59:49, 213.50it/s]  

finished frames 5401200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900325/1666666 [1:32:12<59:46, 213.66it/s]  

finished frames 5401800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900435/1666666 [1:32:12<59:51, 213.36it/s]  

finished frames 5402400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900523/1666666 [1:32:13<59:57, 212.97it/s]  

finished frames 5403000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900633/1666666 [1:32:13<59:41, 213.89it/s]  

finished frames 5403600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900743/1666666 [1:32:14<59:46, 213.54it/s]  

finished frames 5404200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900831/1666666 [1:32:14<59:59, 212.74it/s]  

finished frames 5404800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 900941/1666666 [1:32:15<59:59, 212.74it/s]  

finished frames 5405400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901029/1666666 [1:32:15<1:01:06, 208.80it/s]

finished frames 5406000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901116/1666666 [1:32:16<1:10:40, 180.51it/s]

finished frames 5406600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901225/1666666 [1:32:16<1:06:43, 191.21it/s]

finished frames 5407200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901335/1666666 [1:32:17<1:00:49, 209.71it/s]

finished frames 5407800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901423/1666666 [1:32:17<59:58, 212.64it/s]  

finished frames 5408400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901533/1666666 [1:32:18<59:40, 213.68it/s]  

finished frames 5409000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901643/1666666 [1:32:18<59:31, 214.20it/s]

finished frames 5409600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901731/1666666 [1:32:19<59:30, 214.27it/s]

finished frames 5410200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901841/1666666 [1:32:19<59:29, 214.26it/s]

finished frames 5410800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 901929/1666666 [1:32:20<59:29, 214.26it/s]

finished frames 5411400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902039/1666666 [1:32:20<1:00:56, 209.14it/s]

finished frames 5412000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902127/1666666 [1:32:20<59:50, 212.93it/s]  

finished frames 5412600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902237/1666666 [1:32:21<59:33, 213.94it/s]

finished frames 5413200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902325/1666666 [1:32:21<59:26, 214.31it/s]

finished frames 5413800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902435/1666666 [1:32:22<59:26, 214.26it/s]

finished frames 5414400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902523/1666666 [1:32:22<59:23, 214.42it/s]

finished frames 5415000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902633/1666666 [1:32:23<59:27, 214.18it/s]

finished frames 5415600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902743/1666666 [1:32:23<59:23, 214.36it/s]

finished frames 5416200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902831/1666666 [1:32:24<59:24, 214.30it/s]

finished frames 5416800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 902941/1666666 [1:32:24<59:23, 214.34it/s]

finished frames 5417400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903029/1666666 [1:32:25<1:00:36, 209.98it/s]

finished frames 5418000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903139/1666666 [1:32:25<59:33, 213.65it/s]  

finished frames 5418600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903227/1666666 [1:32:26<59:16, 214.64it/s]

finished frames 5419200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903337/1666666 [1:32:26<59:26, 214.04it/s]

finished frames 5419800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903425/1666666 [1:32:27<59:59, 212.02it/s]  

finished frames 5420400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903533/1666666 [1:32:27<1:03:38, 199.83it/s]

finished frames 5421000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903617/1666666 [1:32:28<1:07:23, 188.71it/s]

finished frames 5421600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903722/1666666 [1:32:28<1:02:16, 204.16it/s]

finished frames 5422200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903827/1666666 [1:32:29<1:01:22, 207.17it/s]

finished frames 5422800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 903932/1666666 [1:32:29<1:01:21, 207.18it/s]

finished frames 5423400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904037/1666666 [1:32:30<1:02:40, 202.78it/s]

finished frames 5424000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904122/1666666 [1:32:30<1:01:31, 206.56it/s]

finished frames 5424600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904227/1666666 [1:32:30<1:01:14, 207.51it/s]

finished frames 5425200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904332/1666666 [1:32:31<1:01:09, 207.76it/s]

finished frames 5425800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904437/1666666 [1:32:31<1:01:08, 207.80it/s]

finished frames 5426400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904523/1666666 [1:32:32<1:00:18, 210.61it/s]

finished frames 5427000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904633/1666666 [1:32:32<59:33, 213.26it/s]  

finished frames 5427600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904743/1666666 [1:32:33<59:35, 213.09it/s]

finished frames 5428200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904831/1666666 [1:32:33<59:37, 212.98it/s]

finished frames 5428800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 904941/1666666 [1:32:34<59:30, 213.37it/s]

finished frames 5429400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905029/1666666 [1:32:34<1:00:46, 208.88it/s]

finished frames 5430000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905139/1666666 [1:32:35<59:37, 212.87it/s]  

finished frames 5430600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905227/1666666 [1:32:35<59:26, 213.47it/s]

finished frames 5431200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905337/1666666 [1:32:36<59:05, 214.72it/s]

finished frames 5431800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905425/1666666 [1:32:36<59:10, 214.43it/s]

finished frames 5432400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905535/1666666 [1:32:37<59:21, 213.71it/s]

finished frames 5433000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905623/1666666 [1:32:37<59:09, 214.38it/s]

finished frames 5433600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905733/1666666 [1:32:38<59:06, 214.57it/s]

finished frames 5434200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905843/1666666 [1:32:38<59:02, 214.78it/s]

finished frames 5434800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 905931/1666666 [1:32:39<1:00:54, 208.17it/s]

finished frames 5435400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906039/1666666 [1:32:39<1:03:22, 200.06it/s]

finished frames 5436000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906127/1666666 [1:32:39<1:00:16, 210.28it/s]

finished frames 5436600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906237/1666666 [1:32:40<59:23, 213.42it/s]  

finished frames 5437200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906325/1666666 [1:32:40<59:22, 213.44it/s]

finished frames 5437800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906435/1666666 [1:32:41<59:19, 213.61it/s]

finished frames 5438400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906523/1666666 [1:32:41<59:19, 213.53it/s]

finished frames 5439000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906633/1666666 [1:32:42<59:13, 213.89it/s]

finished frames 5439600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906743/1666666 [1:32:42<59:16, 213.65it/s]

finished frames 5440200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906831/1666666 [1:32:43<59:08, 214.13it/s]

finished frames 5440800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 906941/1666666 [1:32:43<59:02, 214.49it/s]

finished frames 5441400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907029/1666666 [1:32:44<1:00:27, 209.39it/s]

finished frames 5442000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907139/1666666 [1:32:44<59:31, 212.69it/s]  

finished frames 5442600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907227/1666666 [1:32:45<59:21, 213.21it/s]

finished frames 5443200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907337/1666666 [1:32:45<59:17, 213.42it/s]

finished frames 5443800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907425/1666666 [1:32:46<59:21, 213.16it/s]

finished frames 5444400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907535/1666666 [1:32:46<59:31, 212.55it/s]

finished frames 5445000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907623/1666666 [1:32:47<59:36, 212.20it/s]

finished frames 5445600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907733/1666666 [1:32:47<59:34, 212.31it/s]

finished frames 5446200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907842/1666666 [1:32:48<1:00:27, 209.22it/s]

finished frames 5446800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 907930/1666666 [1:32:48<59:05, 214.00it/s]  

finished frames 5447400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 908039/1666666 [1:32:49<1:01:07, 206.86it/s]

finished frames 5448000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 908123/1666666 [1:32:49<1:00:51, 207.73it/s]

finished frames 5448600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 54%|█████▍    | 908229/1666666 [1:32:49<1:10:56, 178.18it/s]

finished frames 5449200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 908336/1666666 [1:32:50<1:02:08, 203.40it/s]

finished frames 5449800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 908442/1666666 [1:32:51<1:00:55, 207.43it/s]

finished frames 5450400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 908527/1666666 [1:32:51<1:00:28, 208.92it/s]

finished frames 5451000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 908635/1666666 [1:32:51<1:00:12, 209.86it/s]

finished frames 5451600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 908742/1666666 [1:32:52<1:00:13, 209.77it/s]

finished frames 5452200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 908827/1666666 [1:32:52<1:00:19, 209.40it/s]

finished frames 5452800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 908934/1666666 [1:32:53<1:00:08, 209.99it/s]

finished frames 5453400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909020/1666666 [1:32:53<1:02:07, 203.28it/s]

finished frames 5454000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909125/1666666 [1:32:54<1:01:00, 206.92it/s]

finished frames 5454600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909230/1666666 [1:32:54<1:00:39, 208.09it/s]

finished frames 5455200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909335/1666666 [1:32:55<1:00:52, 207.37it/s]

finished frames 5455800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909440/1666666 [1:32:55<1:00:49, 207.48it/s]

finished frames 5456400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909524/1666666 [1:32:56<1:00:44, 207.76it/s]

finished frames 5457000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909629/1666666 [1:32:56<1:00:49, 207.44it/s]

finished frames 5457600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909734/1666666 [1:32:57<1:00:40, 207.90it/s]

finished frames 5458200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909840/1666666 [1:32:57<1:00:30, 208.48it/s]

finished frames 5458800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 909925/1666666 [1:32:58<1:00:30, 208.44it/s]

finished frames 5459400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910030/1666666 [1:32:58<1:02:01, 203.30it/s]

finished frames 5460000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910135/1666666 [1:32:59<1:00:47, 207.40it/s]

finished frames 5460600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910240/1666666 [1:32:59<1:00:28, 208.49it/s]

finished frames 5461200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910324/1666666 [1:33:00<1:00:43, 207.61it/s]

finished frames 5461800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910429/1666666 [1:33:00<1:00:38, 207.83it/s]

finished frames 5462400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910534/1666666 [1:33:01<1:00:38, 207.82it/s]

finished frames 5463000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910639/1666666 [1:33:01<1:02:08, 202.76it/s]

finished frames 5463600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910723/1666666 [1:33:02<1:01:41, 204.22it/s]

finished frames 5464200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910828/1666666 [1:33:02<1:00:42, 207.52it/s]

finished frames 5464800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 910935/1666666 [1:33:03<1:00:07, 209.48it/s]

finished frames 5465400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911022/1666666 [1:33:03<1:00:58, 206.55it/s]

finished frames 5466000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911132/1666666 [1:33:04<58:57, 213.61it/s]  

finished frames 5466600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911242/1666666 [1:33:04<58:41, 214.52it/s]

finished frames 5467200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911330/1666666 [1:33:04<58:31, 215.08it/s]

finished frames 5467800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911440/1666666 [1:33:05<58:28, 215.24it/s]

finished frames 5468400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911528/1666666 [1:33:05<58:25, 215.44it/s]

finished frames 5469000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911638/1666666 [1:33:06<58:25, 215.41it/s]

finished frames 5469600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911726/1666666 [1:33:06<58:39, 214.49it/s]

finished frames 5470200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911836/1666666 [1:33:07<58:43, 214.25it/s]

finished frames 5470800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 911924/1666666 [1:33:07<58:36, 214.61it/s]

finished frames 5471400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912034/1666666 [1:33:08<1:00:00, 209.58it/s]

finished frames 5472000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912144/1666666 [1:33:08<58:48, 213.83it/s]  

finished frames 5472600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912232/1666666 [1:33:09<58:47, 213.87it/s]

finished frames 5473200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912342/1666666 [1:33:09<58:39, 214.33it/s]

finished frames 5473800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912430/1666666 [1:33:10<58:47, 213.80it/s]

finished frames 5474400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912540/1666666 [1:33:10<58:53, 213.41it/s]

finished frames 5475000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912628/1666666 [1:33:11<58:57, 213.16it/s]

finished frames 5475600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912738/1666666 [1:33:11<58:51, 213.48it/s]

finished frames 5476200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912826/1666666 [1:33:11<58:58, 213.02it/s]

finished frames 5476800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 912936/1666666 [1:33:12<58:51, 213.44it/s]

finished frames 5477400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913024/1666666 [1:33:12<59:59, 209.39it/s]  

finished frames 5478000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913134/1666666 [1:33:13<1:01:50, 203.09it/s]

finished frames 5478600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913243/1666666 [1:33:13<59:47, 209.99it/s]  

finished frames 5479200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913331/1666666 [1:33:14<59:18, 211.68it/s]

finished frames 5479800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913441/1666666 [1:33:14<59:03, 212.58it/s]

finished frames 5480400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913529/1666666 [1:33:15<59:06, 212.34it/s]

finished frames 5481000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913639/1666666 [1:33:15<59:03, 212.52it/s]

finished frames 5481600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913727/1666666 [1:33:16<59:08, 212.20it/s]

finished frames 5482200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913837/1666666 [1:33:16<59:01, 212.58it/s]

finished frames 5482800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 913925/1666666 [1:33:17<59:03, 212.43it/s]

finished frames 5483400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914035/1666666 [1:33:17<1:00:14, 208.25it/s]

finished frames 5484000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914123/1666666 [1:33:18<59:19, 211.43it/s]  

finished frames 5484600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914233/1666666 [1:33:18<58:56, 212.75it/s]

finished frames 5485200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914343/1666666 [1:33:19<58:49, 213.18it/s]

finished frames 5485800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914431/1666666 [1:33:19<58:54, 212.82it/s]

finished frames 5486400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914541/1666666 [1:33:20<58:57, 212.62it/s]

finished frames 5487000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914629/1666666 [1:33:20<58:52, 212.87it/s]

finished frames 5487600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914739/1666666 [1:33:20<58:50, 212.95it/s]

finished frames 5488200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914827/1666666 [1:33:21<58:54, 212.72it/s]

finished frames 5488800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 914937/1666666 [1:33:21<58:56, 212.54it/s]

finished frames 5489400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915024/1666666 [1:33:22<1:00:24, 207.39it/s]

finished frames 5490000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915134/1666666 [1:33:22<59:03, 212.06it/s]  

finished frames 5490600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915244/1666666 [1:33:23<58:45, 213.16it/s]

finished frames 5491200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915332/1666666 [1:33:23<1:03:44, 196.45it/s]

finished frames 5491800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915441/1666666 [1:33:24<59:45, 209.49it/s]  

finished frames 5492400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915529/1666666 [1:33:24<59:10, 211.58it/s]

finished frames 5493000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915639/1666666 [1:33:25<58:46, 212.96it/s]

finished frames 5493600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915727/1666666 [1:33:25<59:02, 211.96it/s]

finished frames 5494200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915837/1666666 [1:33:26<58:46, 212.89it/s]

finished frames 5494800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 915925/1666666 [1:33:26<58:42, 213.14it/s]

finished frames 5495400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 916035/1666666 [1:33:27<1:00:13, 207.72it/s]

finished frames 5496000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 916144/1666666 [1:33:27<59:00, 211.97it/s]  

finished frames 5496600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 916232/1666666 [1:33:28<58:40, 213.14it/s]

finished frames 5497200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 916342/1666666 [1:33:28<58:39, 213.20it/s]

finished frames 5497800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 916430/1666666 [1:33:29<58:47, 212.67it/s]

finished frames 5498400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 916540/1666666 [1:33:29<58:35, 213.38it/s]

finished frames 5499000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▍    | 916628/1666666 [1:33:29<58:34, 213.40it/s]

finished frames 5499600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 916738/1666666 [1:33:30<58:34, 213.37it/s]

finished frames 5500200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 916826/1666666 [1:33:30<58:32, 213.46it/s]

finished frames 5500800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 916936/1666666 [1:33:31<58:30, 213.55it/s]

finished frames 5501400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917024/1666666 [1:33:31<59:48, 208.91it/s]  

finished frames 5502000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917134/1666666 [1:33:32<58:48, 212.42it/s]

finished frames 5502600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917222/1666666 [1:33:32<58:29, 213.53it/s]

finished frames 5503200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917332/1666666 [1:33:33<59:30, 209.89it/s]

finished frames 5503800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917440/1666666 [1:33:33<59:36, 209.47it/s]  

finished frames 5504400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917525/1666666 [1:33:34<59:47, 208.80it/s]  

finished frames 5505000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917633/1666666 [1:33:34<59:18, 210.52it/s]  

finished frames 5505600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917721/1666666 [1:33:35<1:03:55, 195.28it/s]

finished frames 5506200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917830/1666666 [1:33:35<59:18, 210.45it/s]  

finished frames 5506800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 917940/1666666 [1:33:36<58:20, 213.91it/s]

finished frames 5507400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918028/1666666 [1:33:36<59:40, 209.08it/s]  

finished frames 5508000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918135/1666666 [1:33:37<59:30, 209.63it/s]  

finished frames 5508600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918243/1666666 [1:33:37<59:18, 210.33it/s]

finished frames 5509200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918330/1666666 [1:33:38<59:17, 210.35it/s]

finished frames 5509800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918439/1666666 [1:33:38<59:16, 210.36it/s]

finished frames 5510400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918526/1666666 [1:33:38<59:15, 210.39it/s]

finished frames 5511000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918636/1666666 [1:33:39<59:14, 210.47it/s]

finished frames 5511600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918723/1666666 [1:33:39<59:05, 210.96it/s]

finished frames 5512200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918833/1666666 [1:33:40<58:50, 211.83it/s]

finished frames 5512800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 918943/1666666 [1:33:40<58:53, 211.63it/s]

finished frames 5513400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919031/1666666 [1:33:41<1:00:18, 206.60it/s]

finished frames 5514000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919139/1666666 [1:33:41<59:05, 210.86it/s]  

finished frames 5514600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919226/1666666 [1:33:42<59:01, 211.02it/s]

finished frames 5515200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919336/1666666 [1:33:42<58:53, 211.50it/s]

finished frames 5515800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919424/1666666 [1:33:43<58:57, 211.23it/s]

finished frames 5516400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919534/1666666 [1:33:43<59:16, 210.08it/s]

finished frames 5517000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919622/1666666 [1:33:44<59:17, 209.97it/s]

finished frames 5517600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919728/1666666 [1:33:44<59:52, 207.90it/s]  

finished frames 5518200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919833/1666666 [1:33:45<59:50, 207.99it/s]  

finished frames 5518800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 919938/1666666 [1:33:45<59:40, 208.56it/s]  

finished frames 5519400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920022/1666666 [1:33:46<1:01:11, 203.37it/s]

finished frames 5520000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920127/1666666 [1:33:46<1:02:51, 197.96it/s]

finished frames 5520600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920232/1666666 [1:33:47<1:03:00, 197.42it/s]

finished frames 5521200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920337/1666666 [1:33:47<1:00:30, 205.60it/s]

finished frames 5521800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920442/1666666 [1:33:48<59:58, 207.37it/s]  

finished frames 5522400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920527/1666666 [1:33:48<59:48, 207.95it/s]  

finished frames 5523000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920632/1666666 [1:33:49<59:46, 208.02it/s]  

finished frames 5523600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920737/1666666 [1:33:49<59:47, 207.93it/s]  

finished frames 5524200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920842/1666666 [1:33:50<1:00:03, 206.97it/s]

finished frames 5524800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 920926/1666666 [1:33:50<59:56, 207.37it/s]  

finished frames 5525400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921031/1666666 [1:33:51<1:01:10, 203.12it/s]

finished frames 5526000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921136/1666666 [1:33:51<1:00:07, 206.66it/s]

finished frames 5526600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921222/1666666 [1:33:52<59:35, 208.49it/s]  

finished frames 5527200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921329/1666666 [1:33:52<59:24, 209.07it/s]

finished frames 5527800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921435/1666666 [1:33:53<59:26, 208.96it/s]

finished frames 5528400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921542/1666666 [1:33:53<59:13, 209.67it/s]

finished frames 5529000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921627/1666666 [1:33:53<1:00:14, 206.12it/s]

finished frames 5529600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921737/1666666 [1:33:54<59:38, 208.15it/s]  

finished frames 5530200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921825/1666666 [1:33:54<58:56, 210.64it/s]

finished frames 5530800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 921931/1666666 [1:33:55<1:00:38, 204.70it/s]

finished frames 5531400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922036/1666666 [1:33:55<1:00:54, 203.76it/s]

finished frames 5532000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922124/1666666 [1:33:56<59:15, 209.41it/s]  

finished frames 5532600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922234/1666666 [1:33:56<57:34, 215.51it/s]

finished frames 5533200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922344/1666666 [1:33:57<57:28, 215.82it/s]

finished frames 5533800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922433/1666666 [1:33:57<1:01:14, 202.52it/s]

finished frames 5534400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922541/1666666 [1:33:58<58:43, 211.18it/s]  

finished frames 5535000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922629/1666666 [1:33:58<57:43, 214.80it/s]

finished frames 5535600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922739/1666666 [1:33:59<58:28, 212.06it/s]

finished frames 5536200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922827/1666666 [1:33:59<58:18, 212.61it/s]

finished frames 5536800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 922937/1666666 [1:34:00<57:57, 213.84it/s]

finished frames 5537400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923024/1666666 [1:34:00<1:01:20, 202.06it/s]

finished frames 5538000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923133/1666666 [1:34:01<58:43, 211.05it/s]  

finished frames 5538600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923243/1666666 [1:34:01<58:06, 213.22it/s]

finished frames 5539200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923331/1666666 [1:34:02<58:16, 212.62it/s]

finished frames 5539800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923441/1666666 [1:34:02<58:13, 212.75it/s]

finished frames 5540400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923528/1666666 [1:34:02<58:37, 211.29it/s]

finished frames 5541000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923638/1666666 [1:34:03<58:29, 211.70it/s]

finished frames 5541600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923723/1666666 [1:34:03<59:22, 208.52it/s]

finished frames 5542200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923828/1666666 [1:34:04<59:31, 208.01it/s]

finished frames 5542800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 923933/1666666 [1:34:04<59:43, 207.25it/s]  

finished frames 5543400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924038/1666666 [1:34:05<1:01:23, 201.63it/s]

finished frames 5544000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924143/1666666 [1:34:05<59:49, 206.86it/s]  

finished frames 5544600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924227/1666666 [1:34:06<59:54, 206.53it/s]  

finished frames 5545200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924332/1666666 [1:34:06<59:54, 206.54it/s]  

finished frames 5545800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924437/1666666 [1:34:07<59:21, 208.38it/s]  

finished frames 5546400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924542/1666666 [1:34:07<59:18, 208.56it/s]

finished frames 5547000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924628/1666666 [1:34:08<59:02, 209.47it/s]

finished frames 5547600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924734/1666666 [1:34:08<59:17, 208.56it/s]

finished frames 5548200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924841/1666666 [1:34:09<1:00:46, 203.45it/s]

finished frames 5548800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 55%|█████▌    | 924925/1666666 [1:34:09<1:02:16, 198.49it/s]

finished frames 5549400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925030/1666666 [1:34:10<1:01:06, 202.28it/s]

finished frames 5550000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925137/1666666 [1:34:10<59:21, 208.23it/s]  

finished frames 5550600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925222/1666666 [1:34:11<59:15, 208.55it/s]

finished frames 5551200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925330/1666666 [1:34:11<58:53, 209.81it/s]

finished frames 5551800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925438/1666666 [1:34:12<58:53, 209.79it/s]

finished frames 5552400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925522/1666666 [1:34:12<59:14, 208.49it/s]

finished frames 5553000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925629/1666666 [1:34:13<58:55, 209.58it/s]

finished frames 5553600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925737/1666666 [1:34:13<58:40, 210.46it/s]

finished frames 5554200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925825/1666666 [1:34:14<58:25, 211.34it/s]

finished frames 5554800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 925935/1666666 [1:34:14<58:16, 211.87it/s]

finished frames 5555400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926022/1666666 [1:34:15<59:44, 206.61it/s]  

finished frames 5556000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926132/1666666 [1:34:15<58:17, 211.73it/s]

finished frames 5556600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926242/1666666 [1:34:16<57:59, 212.77it/s]

finished frames 5557200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926330/1666666 [1:34:16<58:06, 212.31it/s]

finished frames 5557800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926440/1666666 [1:34:16<57:51, 213.23it/s]

finished frames 5558400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926528/1666666 [1:34:17<57:50, 213.26it/s]

finished frames 5559000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926638/1666666 [1:34:17<58:11, 211.97it/s]

finished frames 5559600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926725/1666666 [1:34:18<58:22, 211.27it/s]

finished frames 5560200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926835/1666666 [1:34:18<58:03, 212.41it/s]

finished frames 5560800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 926923/1666666 [1:34:19<58:01, 212.49it/s]

finished frames 5561400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927033/1666666 [1:34:19<59:20, 207.72it/s]

finished frames 5562000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927139/1666666 [1:34:20<1:00:38, 203.25it/s]

finished frames 5562600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927225/1666666 [1:34:20<59:16, 207.89it/s]  

finished frames 5563200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927333/1666666 [1:34:21<59:49, 205.97it/s]  

finished frames 5563800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927441/1666666 [1:34:21<58:34, 210.32it/s]

finished frames 5564400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927528/1666666 [1:34:22<58:29, 210.63it/s]

finished frames 5565000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927637/1666666 [1:34:22<58:36, 210.18it/s]

finished frames 5565600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927724/1666666 [1:34:23<58:40, 209.87it/s]

finished frames 5566200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927831/1666666 [1:34:23<58:45, 209.54it/s]

finished frames 5566800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 927939/1666666 [1:34:24<58:32, 210.30it/s]

finished frames 5567400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928024/1666666 [1:34:24<1:00:10, 204.58it/s]

finished frames 5568000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928131/1666666 [1:34:25<58:50, 209.21it/s]  

finished frames 5568600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928238/1666666 [1:34:25<58:50, 209.15it/s]

finished frames 5569200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928323/1666666 [1:34:26<58:50, 209.12it/s]

finished frames 5569800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928430/1666666 [1:34:26<58:39, 209.73it/s]

finished frames 5570400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928537/1666666 [1:34:27<58:40, 209.64it/s]

finished frames 5571000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928622/1666666 [1:34:27<58:50, 209.07it/s]

finished frames 5571600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928729/1666666 [1:34:27<58:43, 209.41it/s]

finished frames 5572200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928835/1666666 [1:34:28<58:45, 209.31it/s]

finished frames 5572800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 928943/1666666 [1:34:28<58:21, 210.71it/s]

finished frames 5573400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929030/1666666 [1:34:29<59:46, 205.66it/s]  

finished frames 5574000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929138/1666666 [1:34:29<58:23, 210.48it/s]

finished frames 5574600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929225/1666666 [1:34:30<58:31, 210.03it/s]

finished frames 5575200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929333/1666666 [1:34:30<58:32, 209.90it/s]

finished frames 5575800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929441/1666666 [1:34:31<58:23, 210.45it/s]

finished frames 5576400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929529/1666666 [1:34:31<1:00:53, 201.75it/s]

finished frames 5577000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929636/1666666 [1:34:32<1:01:25, 200.00it/s]

finished frames 5577600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929722/1666666 [1:34:32<59:09, 207.64it/s]  

finished frames 5578200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929830/1666666 [1:34:33<58:23, 210.34it/s]

finished frames 5578800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 929938/1666666 [1:34:33<58:53, 208.48it/s]

finished frames 5579400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930022/1666666 [1:34:34<59:55, 204.87it/s]  

finished frames 5580000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930127/1666666 [1:34:34<58:57, 208.20it/s]

finished frames 5580600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930235/1666666 [1:34:35<58:29, 209.86it/s]

finished frames 5581200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930343/1666666 [1:34:35<58:12, 210.81it/s]

finished frames 5581800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930431/1666666 [1:34:36<58:11, 210.84it/s]

finished frames 5582400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930541/1666666 [1:34:36<58:18, 210.43it/s]

finished frames 5583000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930628/1666666 [1:34:37<58:12, 210.72it/s]

finished frames 5583600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930738/1666666 [1:34:37<58:11, 210.75it/s]

finished frames 5584200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930826/1666666 [1:34:38<58:16, 210.43it/s]

finished frames 5584800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 930935/1666666 [1:34:38<58:14, 210.51it/s]

finished frames 5585400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931022/1666666 [1:34:38<59:39, 205.54it/s]

finished frames 5586000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931128/1666666 [1:34:39<58:49, 208.38it/s]

finished frames 5586600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931233/1666666 [1:34:39<58:50, 208.29it/s]

finished frames 5587200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931341/1666666 [1:34:40<58:31, 209.42it/s]

finished frames 5587800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931426/1666666 [1:34:40<58:33, 209.25it/s]

finished frames 5588400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931534/1666666 [1:34:41<57:56, 211.46it/s]

finished frames 5589000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931644/1666666 [1:34:41<57:02, 214.74it/s]

finished frames 5589600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931732/1666666 [1:34:42<56:58, 214.99it/s]

finished frames 5590200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931820/1666666 [1:34:42<1:00:36, 202.08it/s]

finished frames 5590800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 931930/1666666 [1:34:43<57:52, 211.57it/s]  

finished frames 5591400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932039/1666666 [1:34:43<1:00:25, 202.61it/s]

finished frames 5592000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932127/1666666 [1:34:44<57:51, 211.62it/s]  

finished frames 5592600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932237/1666666 [1:34:44<58:06, 210.67it/s]

finished frames 5593200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932324/1666666 [1:34:45<58:00, 211.00it/s]

finished frames 5593800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932433/1666666 [1:34:45<58:07, 210.51it/s]

finished frames 5594400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932543/1666666 [1:34:46<57:53, 211.38it/s]

finished frames 5595000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932631/1666666 [1:34:46<57:53, 211.30it/s]

finished frames 5595600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932740/1666666 [1:34:47<58:53, 207.69it/s]

finished frames 5596200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932825/1666666 [1:34:47<58:51, 207.82it/s]

finished frames 5596800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 932935/1666666 [1:34:48<57:48, 211.52it/s]

finished frames 5597400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933023/1666666 [1:34:48<58:48, 207.92it/s]

finished frames 5598000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933131/1666666 [1:34:49<57:44, 211.75it/s]

finished frames 5598600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933241/1666666 [1:34:49<58:15, 209.85it/s]

finished frames 5599200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933325/1666666 [1:34:50<58:56, 207.37it/s]

finished frames 5599800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933431/1666666 [1:34:50<58:43, 208.13it/s]

finished frames 5600400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933536/1666666 [1:34:51<58:48, 207.80it/s]

finished frames 5601000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933641/1666666 [1:34:51<58:49, 207.66it/s]

finished frames 5601600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933725/1666666 [1:34:51<59:03, 206.83it/s]

finished frames 5602200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933830/1666666 [1:34:52<58:49, 207.60it/s]

finished frames 5602800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 933936/1666666 [1:34:52<58:37, 208.33it/s]

finished frames 5603400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934021/1666666 [1:34:53<1:00:36, 201.47it/s]

finished frames 5604000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934128/1666666 [1:34:53<58:52, 207.40it/s]  

finished frames 5604600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934234/1666666 [1:34:54<1:00:15, 202.60it/s]

finished frames 5605200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934339/1666666 [1:34:54<1:00:45, 200.91it/s]

finished frames 5605800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934423/1666666 [1:34:55<59:24, 205.43it/s]  

finished frames 5606400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934529/1666666 [1:34:55<58:41, 207.93it/s]

finished frames 5607000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934635/1666666 [1:34:56<58:30, 208.55it/s]

finished frames 5607600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934742/1666666 [1:34:56<58:21, 209.02it/s]

finished frames 5608200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934828/1666666 [1:34:57<58:18, 209.20it/s]

finished frames 5608800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 934935/1666666 [1:34:57<58:15, 209.34it/s]

finished frames 5609400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935020/1666666 [1:34:58<1:00:20, 202.07it/s]

finished frames 5610000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935128/1666666 [1:34:58<58:17, 209.17it/s]  

finished frames 5610600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935235/1666666 [1:34:59<58:05, 209.87it/s]

finished frames 5611200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935323/1666666 [1:34:59<57:06, 213.42it/s]

finished frames 5611800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935433/1666666 [1:35:00<56:50, 214.40it/s]

finished frames 5612400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935543/1666666 [1:35:00<56:51, 214.32it/s]

finished frames 5613000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935631/1666666 [1:35:01<56:55, 214.06it/s]

finished frames 5613600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935741/1666666 [1:35:01<56:38, 215.07it/s]

finished frames 5614200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935829/1666666 [1:35:02<56:41, 214.83it/s]

finished frames 5614800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 935939/1666666 [1:35:02<56:51, 214.17it/s]

finished frames 5615400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936027/1666666 [1:35:02<58:01, 209.86it/s]

finished frames 5616000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936137/1666666 [1:35:03<56:58, 213.72it/s]

finished frames 5616600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936225/1666666 [1:35:03<56:54, 213.94it/s]

finished frames 5617200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936335/1666666 [1:35:04<56:40, 214.74it/s]

finished frames 5617800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936423/1666666 [1:35:04<56:45, 214.42it/s]

finished frames 5618400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936533/1666666 [1:35:05<1:01:38, 197.43it/s]

finished frames 5619000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936643/1666666 [1:35:05<57:25, 211.89it/s]  

finished frames 5619600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936731/1666666 [1:35:06<57:03, 213.24it/s]

finished frames 5620200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936841/1666666 [1:35:06<56:48, 214.10it/s]

finished frames 5620800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 936929/1666666 [1:35:07<56:40, 214.61it/s]

finished frames 5621400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 937039/1666666 [1:35:07<58:06, 209.30it/s]

finished frames 5622000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 937127/1666666 [1:35:08<57:04, 213.05it/s]

finished frames 5622600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 937237/1666666 [1:35:08<56:46, 214.14it/s]

finished frames 5623200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 937325/1666666 [1:35:09<56:44, 214.26it/s]

finished frames 5623800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▌    | 937435/1666666 [1:35:09<56:32, 214.96it/s]

finished frames 5624400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 937523/1666666 [1:35:10<56:41, 214.33it/s]

finished frames 5625000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 937633/1666666 [1:35:10<56:25, 215.31it/s]

finished frames 5625600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 937743/1666666 [1:35:11<56:33, 214.81it/s]

finished frames 5626200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 937831/1666666 [1:35:11<56:35, 214.64it/s]

finished frames 5626800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 937941/1666666 [1:35:11<56:32, 214.83it/s]

finished frames 5627400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938029/1666666 [1:35:12<57:50, 209.94it/s]

finished frames 5628000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938139/1666666 [1:35:12<56:53, 213.39it/s]

finished frames 5628600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938227/1666666 [1:35:13<56:52, 213.46it/s]

finished frames 5629200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938337/1666666 [1:35:13<56:34, 214.53it/s]

finished frames 5629800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938425/1666666 [1:35:14<56:26, 215.05it/s]

finished frames 5630400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938535/1666666 [1:35:14<56:31, 214.69it/s]

finished frames 5631000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938623/1666666 [1:35:15<56:37, 214.27it/s]

finished frames 5631600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938733/1666666 [1:35:15<56:31, 214.60it/s]

finished frames 5632200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938843/1666666 [1:35:16<56:33, 214.50it/s]

finished frames 5632800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 938931/1666666 [1:35:16<56:22, 215.15it/s]

finished frames 5633400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939041/1666666 [1:35:17<57:45, 209.99it/s]

finished frames 5634000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939129/1666666 [1:35:17<58:30, 207.27it/s]  

finished frames 5634600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939239/1666666 [1:35:18<56:57, 212.85it/s]

finished frames 5635200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939327/1666666 [1:35:18<56:36, 214.16it/s]

finished frames 5635800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939437/1666666 [1:35:19<56:30, 214.47it/s]

finished frames 5636400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939525/1666666 [1:35:19<56:30, 214.49it/s]

finished frames 5637000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939635/1666666 [1:35:19<56:30, 214.41it/s]

finished frames 5637600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939723/1666666 [1:35:20<56:30, 214.41it/s]

finished frames 5638200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939833/1666666 [1:35:20<56:29, 214.44it/s]

finished frames 5638800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 939943/1666666 [1:35:21<56:26, 214.60it/s]

finished frames 5639400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940031/1666666 [1:35:21<57:35, 210.29it/s]

finished frames 5640000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940141/1666666 [1:35:22<56:46, 213.28it/s]

finished frames 5640600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940229/1666666 [1:35:22<56:29, 214.34it/s]

finished frames 5641200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940339/1666666 [1:35:23<56:33, 214.03it/s]

finished frames 5641800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940427/1666666 [1:35:23<56:30, 214.20it/s]

finished frames 5642400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940537/1666666 [1:35:24<56:31, 214.11it/s]

finished frames 5643000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940625/1666666 [1:35:24<56:31, 214.11it/s]

finished frames 5643600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940735/1666666 [1:35:25<56:26, 214.34it/s]

finished frames 5644200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940823/1666666 [1:35:25<56:32, 213.96it/s]

finished frames 5644800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 940933/1666666 [1:35:26<56:21, 214.61it/s]

finished frames 5645400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 941021/1666666 [1:35:26<58:14, 207.67it/s]

finished frames 5646000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 941130/1666666 [1:35:26<57:04, 211.87it/s]

finished frames 5646600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 941240/1666666 [1:35:27<56:31, 213.89it/s]

finished frames 5647200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 941328/1666666 [1:35:27<56:19, 214.63it/s]

finished frames 5647800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 941438/1666666 [1:35:28<56:15, 214.83it/s]

finished frames 5648400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 941526/1666666 [1:35:28<59:33, 202.94it/s]  

finished frames 5649000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 56%|█████▋    | 941636/1666666 [1:35:29<56:44, 212.97it/s]

finished frames 5649600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 941724/1666666 [1:35:29<56:26, 214.10it/s]

finished frames 5650200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 941834/1666666 [1:35:30<56:06, 215.33it/s]

finished frames 5650800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 941944/1666666 [1:35:30<56:09, 215.05it/s]

finished frames 5651400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942031/1666666 [1:35:31<58:15, 207.32it/s]

finished frames 5652000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942138/1666666 [1:35:31<57:48, 208.87it/s]

finished frames 5652600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942222/1666666 [1:35:32<58:07, 207.70it/s]

finished frames 5653200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942328/1666666 [1:35:32<58:04, 207.89it/s]

finished frames 5653800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942433/1666666 [1:35:33<57:54, 208.43it/s]

finished frames 5654400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942538/1666666 [1:35:33<58:00, 208.02it/s]

finished frames 5655000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942622/1666666 [1:35:34<58:09, 207.51it/s]

finished frames 5655600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942728/1666666 [1:35:34<57:53, 208.41it/s]

finished frames 5656200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942835/1666666 [1:35:35<57:34, 209.53it/s]

finished frames 5656800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 942941/1666666 [1:35:35<57:38, 209.25it/s]

finished frames 5657400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943025/1666666 [1:35:36<59:05, 204.13it/s]

finished frames 5658000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943132/1666666 [1:35:36<57:47, 208.64it/s]

finished frames 5658600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943239/1666666 [1:35:37<57:34, 209.39it/s]

finished frames 5659200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943323/1666666 [1:35:37<57:51, 208.37it/s]

finished frames 5659800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943429/1666666 [1:35:37<57:44, 208.74it/s]

finished frames 5660400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943535/1666666 [1:35:38<57:42, 208.86it/s]

finished frames 5661000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943642/1666666 [1:35:38<57:30, 209.57it/s]

finished frames 5661600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943726/1666666 [1:35:39<1:03:44, 189.02it/s]

finished frames 5662200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943832/1666666 [1:35:40<1:02:18, 193.36it/s]

finished frames 5662800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 943937/1666666 [1:35:40<58:42, 205.16it/s]  

finished frames 5663400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944021/1666666 [1:35:40<59:59, 200.76it/s]

finished frames 5664000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944127/1666666 [1:35:41<58:10, 206.98it/s]

finished frames 5664600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944232/1666666 [1:35:41<57:54, 207.93it/s]

finished frames 5665200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944337/1666666 [1:35:42<58:41, 205.15it/s]

finished frames 5665800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944442/1666666 [1:35:42<59:12, 203.32it/s]

finished frames 5666400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944526/1666666 [1:35:43<59:23, 202.67it/s]

finished frames 5667000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944631/1666666 [1:35:43<58:59, 203.97it/s]

finished frames 5667600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944736/1666666 [1:35:44<59:51, 201.02it/s]  

finished frames 5668200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944841/1666666 [1:35:44<59:31, 202.12it/s]  

finished frames 5668800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 944925/1666666 [1:35:45<58:42, 204.90it/s]

finished frames 5669400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945030/1666666 [1:35:45<58:44, 204.76it/s]

finished frames 5670000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945138/1666666 [1:35:46<57:09, 210.38it/s]

finished frames 5670600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945226/1666666 [1:35:46<57:09, 210.36it/s]

finished frames 5671200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945336/1666666 [1:35:47<57:00, 210.91it/s]

finished frames 5671800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945424/1666666 [1:35:47<56:53, 211.30it/s]

finished frames 5672400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945534/1666666 [1:35:48<56:22, 213.17it/s]

finished frames 5673000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945644/1666666 [1:35:48<56:07, 214.12it/s]

finished frames 5673600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945732/1666666 [1:35:49<56:10, 213.87it/s]

finished frames 5674200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945842/1666666 [1:35:49<56:13, 213.65it/s]

finished frames 5674800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 945930/1666666 [1:35:50<56:22, 213.08it/s]

finished frames 5675400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946039/1666666 [1:35:50<58:30, 205.30it/s]

finished frames 5676000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946124/1666666 [1:35:51<1:02:05, 193.40it/s]

finished frames 5676600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946229/1666666 [1:35:51<58:33, 205.06it/s]  

finished frames 5677200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946334/1666666 [1:35:52<57:58, 207.05it/s]

finished frames 5677800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946439/1666666 [1:35:52<57:50, 207.52it/s]

finished frames 5678400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946523/1666666 [1:35:53<57:58, 207.03it/s]

finished frames 5679000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946628/1666666 [1:35:53<58:06, 206.50it/s]

finished frames 5679600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946737/1666666 [1:35:54<56:16, 213.21it/s]

finished frames 5680200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946826/1666666 [1:35:54<56:12, 213.44it/s]

finished frames 5680800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 946938/1666666 [1:35:54<55:52, 214.69it/s]

finished frames 5681400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947025/1666666 [1:35:55<57:59, 206.81it/s]

finished frames 5682000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947133/1666666 [1:35:55<56:37, 211.80it/s]

finished frames 5682600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947243/1666666 [1:35:56<56:01, 214.02it/s]

finished frames 5683200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947331/1666666 [1:35:56<55:57, 214.24it/s]

finished frames 5683800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947441/1666666 [1:35:57<56:28, 212.23it/s]

finished frames 5684400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947529/1666666 [1:35:57<56:41, 211.39it/s]

finished frames 5685000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947636/1666666 [1:35:58<57:42, 207.64it/s]

finished frames 5685600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947722/1666666 [1:35:58<57:01, 210.14it/s]

finished frames 5686200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947832/1666666 [1:35:59<56:28, 212.15it/s]

finished frames 5686800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 947938/1666666 [1:35:59<57:30, 208.30it/s]

finished frames 5687400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948022/1666666 [1:36:00<59:13, 202.22it/s]

finished frames 5688000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948130/1666666 [1:36:00<57:30, 208.23it/s]

finished frames 5688600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948238/1666666 [1:36:01<56:41, 211.20it/s]

finished frames 5689200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948326/1666666 [1:36:01<56:15, 212.84it/s]

finished frames 5689800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948436/1666666 [1:36:02<56:16, 212.73it/s]

finished frames 5690400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948524/1666666 [1:36:02<58:42, 203.87it/s]  

finished frames 5691000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948634/1666666 [1:36:03<56:26, 212.02it/s]

finished frames 5691600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948744/1666666 [1:36:03<56:11, 212.91it/s]

finished frames 5692200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948832/1666666 [1:36:04<56:01, 213.57it/s]

finished frames 5692800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 948942/1666666 [1:36:04<56:00, 213.60it/s]

finished frames 5693400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949030/1666666 [1:36:04<57:16, 208.84it/s]

finished frames 5694000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949140/1666666 [1:36:05<56:10, 212.91it/s]

finished frames 5694600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949228/1666666 [1:36:05<56:01, 213.43it/s]

finished frames 5695200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949338/1666666 [1:36:06<56:00, 213.49it/s]

finished frames 5695800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949426/1666666 [1:36:06<55:59, 213.49it/s]

finished frames 5696400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949536/1666666 [1:36:07<56:15, 212.45it/s]

finished frames 5697000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949624/1666666 [1:36:07<56:09, 212.82it/s]

finished frames 5697600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949734/1666666 [1:36:08<56:10, 212.69it/s]

finished frames 5698200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949844/1666666 [1:36:08<56:02, 213.18it/s]

finished frames 5698800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 949932/1666666 [1:36:09<56:08, 212.80it/s]

finished frames 5699400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950020/1666666 [1:36:09<58:27, 204.32it/s]

finished frames 5700000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950125/1666666 [1:36:10<57:41, 206.98it/s]

finished frames 5700600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950231/1666666 [1:36:10<57:15, 208.56it/s]

finished frames 5701200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950336/1666666 [1:36:11<57:33, 207.41it/s]

finished frames 5701800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950441/1666666 [1:36:11<57:40, 206.95it/s]

finished frames 5702400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950525/1666666 [1:36:12<57:45, 206.65it/s]

finished frames 5703000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950630/1666666 [1:36:12<57:33, 207.36it/s]

finished frames 5703600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950714/1666666 [1:36:12<58:14, 204.85it/s]

finished frames 5704200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950819/1666666 [1:36:13<59:37, 200.07it/s]  

finished frames 5704800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 950924/1666666 [1:36:14<59:16, 201.22it/s]  

finished frames 5705400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951029/1666666 [1:36:14<58:58, 202.24it/s]

finished frames 5706000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951134/1666666 [1:36:15<58:06, 205.23it/s]

finished frames 5706600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951239/1666666 [1:36:15<57:28, 207.45it/s]

finished frames 5707200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951323/1666666 [1:36:16<57:32, 207.18it/s]

finished frames 5707800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951428/1666666 [1:36:16<57:21, 207.85it/s]

finished frames 5708400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951533/1666666 [1:36:17<57:27, 207.41it/s]

finished frames 5709000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951638/1666666 [1:36:17<57:18, 207.93it/s]

finished frames 5709600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951743/1666666 [1:36:18<57:08, 208.54it/s]

finished frames 5710200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951827/1666666 [1:36:18<57:17, 207.94it/s]

finished frames 5710800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 951932/1666666 [1:36:18<57:17, 207.93it/s]

finished frames 5711400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952037/1666666 [1:36:19<58:30, 203.59it/s]

finished frames 5712000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952142/1666666 [1:36:19<57:24, 207.42it/s]

finished frames 5712600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952226/1666666 [1:36:20<57:21, 207.60it/s]

finished frames 5713200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952331/1666666 [1:36:20<57:18, 207.74it/s]

finished frames 5713800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952437/1666666 [1:36:21<57:12, 208.05it/s]

finished frames 5714400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952542/1666666 [1:36:21<57:11, 208.11it/s]

finished frames 5715000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952626/1666666 [1:36:22<57:16, 207.80it/s]

finished frames 5715600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952731/1666666 [1:36:22<57:11, 208.08it/s]

finished frames 5716200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952836/1666666 [1:36:23<57:08, 208.22it/s]

finished frames 5716800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 952941/1666666 [1:36:23<57:17, 207.65it/s]

finished frames 5717400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953025/1666666 [1:36:24<58:32, 203.19it/s]

finished frames 5718000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953130/1666666 [1:36:24<58:44, 202.48it/s]  

finished frames 5718600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953235/1666666 [1:36:25<57:21, 207.33it/s]

finished frames 5719200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953341/1666666 [1:36:25<56:51, 209.10it/s]

finished frames 5719800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953426/1666666 [1:36:26<57:00, 208.54it/s]

finished frames 5720400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953532/1666666 [1:36:26<56:59, 208.52it/s]

finished frames 5721000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953638/1666666 [1:36:27<56:50, 209.06it/s]

finished frames 5721600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953723/1666666 [1:36:27<56:51, 208.99it/s]

finished frames 5722200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953829/1666666 [1:36:28<56:56, 208.65it/s]

finished frames 5722800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 953936/1666666 [1:36:28<56:43, 209.42it/s]

finished frames 5723400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954041/1666666 [1:36:29<58:09, 204.24it/s]

finished frames 5724000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954126/1666666 [1:36:29<57:11, 207.67it/s]

finished frames 5724600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954233/1666666 [1:36:30<56:55, 208.61it/s]

finished frames 5725200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954339/1666666 [1:36:30<56:47, 209.02it/s]

finished frames 5725800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954423/1666666 [1:36:30<56:55, 208.51it/s]

finished frames 5726400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954529/1666666 [1:36:31<56:55, 208.49it/s]

finished frames 5727000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954635/1666666 [1:36:32<56:48, 208.90it/s]

finished frames 5727600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954740/1666666 [1:36:32<56:52, 208.64it/s]

finished frames 5728200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954824/1666666 [1:36:32<56:58, 208.26it/s]

finished frames 5728800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 954930/1666666 [1:36:33<56:54, 208.42it/s]

finished frames 5729400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955035/1666666 [1:36:33<58:04, 204.21it/s]

finished frames 5730000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955142/1666666 [1:36:34<56:52, 208.52it/s]

finished frames 5730600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955226/1666666 [1:36:34<56:53, 208.39it/s]

finished frames 5731200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955331/1666666 [1:36:35<57:00, 207.97it/s]

finished frames 5731800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955436/1666666 [1:36:35<58:34, 202.38it/s]  

finished frames 5732400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955542/1666666 [1:36:36<59:27, 199.32it/s]  

finished frames 5733000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955626/1666666 [1:36:36<57:33, 205.88it/s]

finished frames 5733600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955731/1666666 [1:36:37<57:01, 207.79it/s]

finished frames 5734200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955837/1666666 [1:36:37<56:41, 208.95it/s]

finished frames 5734800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 955922/1666666 [1:36:38<56:50, 208.43it/s]

finished frames 5735400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956027/1666666 [1:36:38<57:47, 204.92it/s]

finished frames 5736000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956132/1666666 [1:36:39<57:02, 207.62it/s]

finished frames 5736600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956237/1666666 [1:36:39<56:49, 208.36it/s]

finished frames 5737200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956342/1666666 [1:36:40<56:54, 208.03it/s]

finished frames 5737800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956426/1666666 [1:36:40<56:52, 208.13it/s]

finished frames 5738400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956532/1666666 [1:36:41<56:47, 208.43it/s]

finished frames 5739000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956637/1666666 [1:36:41<56:41, 208.75it/s]

finished frames 5739600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956742/1666666 [1:36:42<56:56, 207.77it/s]

finished frames 5740200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956827/1666666 [1:36:42<56:52, 208.04it/s]

finished frames 5740800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 956934/1666666 [1:36:43<56:26, 209.57it/s]

finished frames 5741400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957022/1666666 [1:36:43<57:36, 205.28it/s]

finished frames 5742000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957131/1666666 [1:36:44<55:51, 211.71it/s]

finished frames 5742600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957241/1666666 [1:36:44<55:28, 213.16it/s]

finished frames 5743200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957329/1666666 [1:36:45<55:17, 213.84it/s]

finished frames 5743800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957439/1666666 [1:36:45<55:05, 214.54it/s]

finished frames 5744400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957527/1666666 [1:36:45<55:08, 214.35it/s]

finished frames 5745000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957637/1666666 [1:36:46<55:09, 214.22it/s]

finished frames 5745600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957725/1666666 [1:36:46<59:54, 197.25it/s]

finished frames 5746200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957834/1666666 [1:36:47<56:10, 210.29it/s]

finished frames 5746800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 957944/1666666 [1:36:47<55:26, 213.03it/s]

finished frames 5747400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 958032/1666666 [1:36:48<56:40, 208.39it/s]

finished frames 5748000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 958140/1666666 [1:36:48<56:20, 209.58it/s]

finished frames 5748600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 57%|█████▋    | 958228/1666666 [1:36:49<55:21, 213.30it/s]

finished frames 5749200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 958338/1666666 [1:36:49<55:02, 214.51it/s]

finished frames 5749800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 958426/1666666 [1:36:50<54:56, 214.86it/s]

finished frames 5750400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 958536/1666666 [1:36:50<54:54, 214.94it/s]

finished frames 5751000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 958624/1666666 [1:36:51<54:40, 215.81it/s]

finished frames 5751600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 958734/1666666 [1:36:51<56:25, 209.11it/s]

finished frames 5752200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 958841/1666666 [1:36:52<56:23, 209.18it/s]

finished frames 5752800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 958927/1666666 [1:36:52<56:02, 210.48it/s]

finished frames 5753400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959036/1666666 [1:36:53<57:09, 206.36it/s]

finished frames 5754000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959123/1666666 [1:36:53<55:59, 210.63it/s]

finished frames 5754600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959233/1666666 [1:36:54<55:47, 211.34it/s]

finished frames 5755200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959343/1666666 [1:36:54<55:26, 212.61it/s]

finished frames 5755800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959431/1666666 [1:36:54<55:30, 212.36it/s]

finished frames 5756400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959541/1666666 [1:36:55<55:36, 211.93it/s]

finished frames 5757000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959629/1666666 [1:36:55<55:21, 212.85it/s]

finished frames 5757600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959739/1666666 [1:36:56<55:22, 212.76it/s]

finished frames 5758200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959827/1666666 [1:36:56<55:07, 213.70it/s]

finished frames 5758800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 959937/1666666 [1:36:57<54:52, 214.63it/s]

finished frames 5759400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960025/1666666 [1:36:57<56:24, 208.77it/s]

finished frames 5760000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960135/1666666 [1:36:58<55:28, 212.28it/s]

finished frames 5760600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960223/1666666 [1:36:58<55:20, 212.77it/s]

finished frames 5761200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960333/1666666 [1:36:59<55:11, 213.28it/s]

finished frames 5761800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960443/1666666 [1:36:59<55:15, 212.98it/s]

finished frames 5762400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960531/1666666 [1:37:00<55:25, 212.33it/s]

finished frames 5763000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960641/1666666 [1:37:00<55:07, 213.46it/s]

finished frames 5763600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960729/1666666 [1:37:01<55:15, 212.90it/s]

finished frames 5764200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960839/1666666 [1:37:01<55:12, 213.05it/s]

finished frames 5764800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 960927/1666666 [1:37:02<55:12, 213.06it/s]

finished frames 5765400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961036/1666666 [1:37:02<56:40, 207.48it/s]

finished frames 5766000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961123/1666666 [1:37:02<55:33, 211.65it/s]

finished frames 5766600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961233/1666666 [1:37:03<55:13, 212.87it/s]

finished frames 5767200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961343/1666666 [1:37:03<55:09, 213.09it/s]

finished frames 5767800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961431/1666666 [1:37:04<55:19, 212.44it/s]

finished frames 5768400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961541/1666666 [1:37:04<55:02, 213.52it/s]

finished frames 5769000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961629/1666666 [1:37:05<55:10, 212.94it/s]

finished frames 5769600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961739/1666666 [1:37:05<55:04, 213.33it/s]

finished frames 5770200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961827/1666666 [1:37:06<55:11, 212.85it/s]

finished frames 5770800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 961937/1666666 [1:37:06<55:20, 212.22it/s]

finished frames 5771400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962024/1666666 [1:37:07<56:35, 207.50it/s]

finished frames 5772000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962133/1666666 [1:37:07<55:21, 212.11it/s]

finished frames 5772600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962243/1666666 [1:37:08<55:05, 213.12it/s]

finished frames 5773200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962331/1666666 [1:37:08<55:07, 212.96it/s]

finished frames 5773800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962441/1666666 [1:37:09<55:04, 213.14it/s]

finished frames 5774400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962529/1666666 [1:37:09<55:18, 212.22it/s]

finished frames 5775000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962617/1666666 [1:37:10<59:58, 195.63it/s]

finished frames 5775600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962727/1666666 [1:37:10<55:38, 210.88it/s]

finished frames 5776200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962837/1666666 [1:37:11<54:50, 213.90it/s]

finished frames 5776800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 962925/1666666 [1:37:11<54:49, 213.92it/s]

finished frames 5777400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963035/1666666 [1:37:12<55:48, 210.16it/s]

finished frames 5778000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963123/1666666 [1:37:12<55:10, 212.49it/s]

finished frames 5778600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963233/1666666 [1:37:12<54:49, 213.85it/s]

finished frames 5779200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963343/1666666 [1:37:13<54:38, 214.51it/s]

finished frames 5779800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963431/1666666 [1:37:13<54:35, 214.67it/s]

finished frames 5780400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963541/1666666 [1:37:14<54:28, 215.09it/s]

finished frames 5781000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963629/1666666 [1:37:14<54:29, 215.04it/s]

finished frames 5781600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963739/1666666 [1:37:15<54:32, 214.80it/s]

finished frames 5782200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963827/1666666 [1:37:15<54:38, 214.38it/s]

finished frames 5782800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 963937/1666666 [1:37:16<55:13, 212.06it/s]

finished frames 5783400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964022/1666666 [1:37:16<57:10, 204.79it/s]

finished frames 5784000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964129/1666666 [1:37:17<56:08, 208.56it/s]

finished frames 5784600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964236/1666666 [1:37:17<55:50, 209.66it/s]

finished frames 5785200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964342/1666666 [1:37:18<55:59, 209.05it/s]

finished frames 5785800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964428/1666666 [1:37:18<55:57, 209.19it/s]

finished frames 5786400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964536/1666666 [1:37:19<55:50, 209.57it/s]

finished frames 5787000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964643/1666666 [1:37:19<55:54, 209.28it/s]

finished frames 5787600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964729/1666666 [1:37:20<56:02, 208.76it/s]

finished frames 5788200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964837/1666666 [1:37:20<55:26, 210.98it/s]

finished frames 5788800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 964925/1666666 [1:37:20<57:04, 204.90it/s]

finished frames 5789400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965033/1666666 [1:37:21<1:00:48, 192.31it/s]

finished frames 5790000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965143/1666666 [1:37:22<55:27, 210.81it/s]  

finished frames 5790600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965231/1666666 [1:37:22<54:40, 213.83it/s]

finished frames 5791200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965341/1666666 [1:37:23<54:23, 214.91it/s]

finished frames 5791800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965429/1666666 [1:37:23<54:23, 214.87it/s]

finished frames 5792400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965539/1666666 [1:37:23<54:08, 215.82it/s]

finished frames 5793000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965627/1666666 [1:37:24<54:18, 215.11it/s]

finished frames 5793600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965737/1666666 [1:37:24<53:57, 216.52it/s]

finished frames 5794200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965825/1666666 [1:37:25<54:16, 215.22it/s]

finished frames 5794800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 965935/1666666 [1:37:25<54:45, 213.29it/s]

finished frames 5795400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966023/1666666 [1:37:26<55:53, 208.96it/s]

finished frames 5796000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966133/1666666 [1:37:26<54:59, 212.34it/s]

finished frames 5796600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966243/1666666 [1:37:27<54:42, 213.38it/s]

finished frames 5797200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966331/1666666 [1:37:27<54:36, 213.72it/s]

finished frames 5797800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966441/1666666 [1:37:28<54:27, 214.30it/s]

finished frames 5798400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966529/1666666 [1:37:28<54:26, 214.36it/s]

finished frames 5799000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966639/1666666 [1:37:29<54:17, 214.93it/s]

finished frames 5799600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966727/1666666 [1:37:29<54:33, 213.83it/s]

finished frames 5800200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966837/1666666 [1:37:30<54:12, 215.16it/s]

finished frames 5800800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 966925/1666666 [1:37:30<55:06, 211.63it/s]

finished frames 5801400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967035/1666666 [1:37:30<56:15, 207.24it/s]

finished frames 5802000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967142/1666666 [1:37:31<55:36, 209.66it/s]

finished frames 5802600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967227/1666666 [1:37:31<55:47, 208.96it/s]

finished frames 5803200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967334/1666666 [1:37:32<55:39, 209.43it/s]

finished frames 5803800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967441/1666666 [1:37:32<58:57, 197.69it/s]  

finished frames 5804400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967526/1666666 [1:37:33<56:28, 206.32it/s]

finished frames 5805000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967632/1666666 [1:37:33<55:46, 208.88it/s]

finished frames 5805600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967739/1666666 [1:37:34<55:40, 209.22it/s]

finished frames 5806200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967825/1666666 [1:37:34<55:30, 209.86it/s]

finished frames 5806800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 967932/1666666 [1:37:35<55:33, 209.62it/s]

finished frames 5807400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968038/1666666 [1:37:35<57:49, 201.38it/s]

finished frames 5808000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968123/1666666 [1:37:36<56:05, 207.55it/s]

finished frames 5808600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968230/1666666 [1:37:36<55:39, 209.17it/s]

finished frames 5809200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968335/1666666 [1:37:37<56:03, 207.60it/s]

finished frames 5809800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968440/1666666 [1:37:37<56:08, 207.26it/s]

finished frames 5810400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968524/1666666 [1:37:38<56:16, 206.80it/s]

finished frames 5811000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968629/1666666 [1:37:38<56:14, 206.84it/s]

finished frames 5811600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968734/1666666 [1:37:39<56:04, 207.47it/s]

finished frames 5812200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968839/1666666 [1:37:39<56:01, 207.58it/s]

finished frames 5812800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 968924/1666666 [1:37:40<55:51, 208.17it/s]

finished frames 5813400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969034/1666666 [1:37:40<55:50, 208.19it/s]

finished frames 5814000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969144/1666666 [1:37:41<54:25, 213.62it/s]

finished frames 5814600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969232/1666666 [1:37:41<54:18, 214.00it/s]

finished frames 5815200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969342/1666666 [1:37:42<54:14, 214.29it/s]

finished frames 5815800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969430/1666666 [1:37:42<54:24, 213.59it/s]

finished frames 5816400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969540/1666666 [1:37:42<54:12, 214.32it/s]

finished frames 5817000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969628/1666666 [1:37:43<57:45, 201.15it/s]

finished frames 5817600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969737/1666666 [1:37:43<54:51, 211.72it/s]

finished frames 5818200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969825/1666666 [1:37:44<54:31, 213.01it/s]

finished frames 5818800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 969935/1666666 [1:37:44<54:24, 213.41it/s]

finished frames 5819400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970023/1666666 [1:37:45<55:27, 209.37it/s]

finished frames 5820000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970133/1666666 [1:37:45<54:24, 213.39it/s]

finished frames 5820600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970243/1666666 [1:37:46<54:09, 214.33it/s]

finished frames 5821200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970331/1666666 [1:37:46<53:55, 215.23it/s]

finished frames 5821800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970441/1666666 [1:37:47<55:12, 210.20it/s]

finished frames 5822400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970529/1666666 [1:37:47<54:10, 214.19it/s]

finished frames 5823000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970639/1666666 [1:37:48<54:01, 214.71it/s]

finished frames 5823600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970727/1666666 [1:37:48<54:00, 214.73it/s]

finished frames 5824200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970837/1666666 [1:37:49<53:57, 214.90it/s]

finished frames 5824800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 970925/1666666 [1:37:49<53:52, 215.21it/s]

finished frames 5825400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971035/1666666 [1:37:50<55:16, 209.78it/s]

finished frames 5826000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971123/1666666 [1:37:50<54:14, 213.70it/s]

finished frames 5826600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971233/1666666 [1:37:50<53:58, 214.74it/s]

finished frames 5827200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971343/1666666 [1:37:51<53:57, 214.74it/s]

finished frames 5827800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971431/1666666 [1:37:51<54:02, 214.44it/s]

finished frames 5828400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971541/1666666 [1:37:52<54:02, 214.41it/s]

finished frames 5829000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971629/1666666 [1:37:52<54:03, 214.28it/s]

finished frames 5829600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971739/1666666 [1:37:53<54:02, 214.34it/s]

finished frames 5830200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971827/1666666 [1:37:53<54:55, 210.81it/s]

finished frames 5830800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 971940/1666666 [1:37:54<52:48, 219.24it/s]

finished frames 5831400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972029/1666666 [1:37:54<54:14, 213.43it/s]

finished frames 5832000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972117/1666666 [1:37:55<54:12, 213.53it/s]

finished frames 5832600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972226/1666666 [1:37:55<55:45, 207.60it/s]

finished frames 5833200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972336/1666666 [1:37:56<53:28, 216.40it/s]

finished frames 5833800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972424/1666666 [1:37:56<53:09, 217.67it/s]

finished frames 5834400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972534/1666666 [1:37:57<52:59, 218.33it/s]

finished frames 5835000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972645/1666666 [1:37:57<53:20, 216.87it/s]

finished frames 5835600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972733/1666666 [1:37:57<54:20, 212.86it/s]

finished frames 5836200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972843/1666666 [1:37:58<54:49, 210.93it/s]

finished frames 5836800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 972932/1666666 [1:37:58<54:37, 211.68it/s]

finished frames 5837400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973020/1666666 [1:37:59<56:09, 205.89it/s]

finished frames 5838000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973129/1666666 [1:37:59<54:37, 211.60it/s]

finished frames 5838600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973239/1666666 [1:38:00<54:28, 212.13it/s]

finished frames 5839200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973327/1666666 [1:38:00<55:14, 209.19it/s]

finished frames 5839800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973435/1666666 [1:38:01<54:43, 211.10it/s]

finished frames 5840400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973523/1666666 [1:38:01<54:16, 212.86it/s]

finished frames 5841000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973633/1666666 [1:38:02<54:15, 212.86it/s]

finished frames 5841600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973743/1666666 [1:38:02<54:15, 212.81it/s]

finished frames 5842200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973831/1666666 [1:38:03<54:35, 211.52it/s]

finished frames 5842800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 973941/1666666 [1:38:03<54:43, 210.99it/s]

finished frames 5843400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974026/1666666 [1:38:04<56:46, 203.33it/s]

finished frames 5844000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974131/1666666 [1:38:04<55:40, 207.31it/s]

finished frames 5844600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974236/1666666 [1:38:05<55:33, 207.71it/s]

finished frames 5845200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974341/1666666 [1:38:05<55:33, 207.67it/s]

finished frames 5845800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974425/1666666 [1:38:06<59:09, 195.05it/s]  

finished frames 5846400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974528/1666666 [1:38:06<57:32, 200.48it/s]

finished frames 5847000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974633/1666666 [1:38:07<57:03, 202.11it/s]

finished frames 5847600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974738/1666666 [1:38:07<56:57, 202.47it/s]

finished frames 5848200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974822/1666666 [1:38:08<57:08, 201.79it/s]

finished frames 5848800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 58%|█████▊    | 974927/1666666 [1:38:08<57:06, 201.87it/s]

finished frames 5849400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975032/1666666 [1:38:09<58:13, 197.99it/s]

finished frames 5850000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975135/1666666 [1:38:09<57:09, 201.63it/s]

finished frames 5850600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975240/1666666 [1:38:10<56:50, 202.71it/s]

finished frames 5851200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975324/1666666 [1:38:10<57:03, 201.92it/s]

finished frames 5851800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975429/1666666 [1:38:11<57:04, 201.87it/s]

finished frames 5852400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975534/1666666 [1:38:11<56:55, 202.35it/s]

finished frames 5853000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975639/1666666 [1:38:12<56:53, 202.44it/s]

finished frames 5853600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975723/1666666 [1:38:12<57:00, 202.00it/s]

finished frames 5854200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975828/1666666 [1:38:13<56:51, 202.50it/s]

finished frames 5854800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 975933/1666666 [1:38:13<56:58, 202.06it/s]

finished frames 5855400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976038/1666666 [1:38:14<58:10, 197.84it/s]

finished frames 5856000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976141/1666666 [1:38:14<57:04, 201.66it/s]

finished frames 5856600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976225/1666666 [1:38:15<57:04, 201.60it/s]

finished frames 5857200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976330/1666666 [1:38:15<56:53, 202.25it/s]

finished frames 5857800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976435/1666666 [1:38:16<56:45, 202.70it/s]

finished frames 5858400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976540/1666666 [1:38:16<56:46, 202.58it/s]

finished frames 5859000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976624/1666666 [1:38:17<56:47, 202.51it/s]

finished frames 5859600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976728/1666666 [1:38:17<57:57, 198.39it/s]  

finished frames 5860200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976831/1666666 [1:38:18<58:44, 195.72it/s]  

finished frames 5860800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 976934/1666666 [1:38:18<57:04, 201.44it/s]

finished frames 5861400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977039/1666666 [1:38:19<58:13, 197.41it/s]

finished frames 5862000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977142/1666666 [1:38:19<56:58, 201.70it/s]

finished frames 5862600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977226/1666666 [1:38:20<56:47, 202.32it/s]

finished frames 5863200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977331/1666666 [1:38:20<56:44, 202.48it/s]

finished frames 5863800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977436/1666666 [1:38:21<56:39, 202.77it/s]

finished frames 5864400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977541/1666666 [1:38:21<56:42, 202.56it/s]

finished frames 5865000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977625/1666666 [1:38:22<56:03, 204.84it/s]

finished frames 5865600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977733/1666666 [1:38:22<54:18, 211.40it/s]

finished frames 5866200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977843/1666666 [1:38:23<53:48, 213.38it/s]

finished frames 5866800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 977931/1666666 [1:38:23<53:43, 213.66it/s]

finished frames 5867400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978041/1666666 [1:38:24<54:57, 208.86it/s]

finished frames 5868000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978129/1666666 [1:38:24<54:04, 212.24it/s]

finished frames 5868600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978239/1666666 [1:38:24<53:50, 213.08it/s]

finished frames 5869200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978327/1666666 [1:38:25<53:42, 213.58it/s]

finished frames 5869800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978437/1666666 [1:38:25<53:38, 213.84it/s]

finished frames 5870400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978525/1666666 [1:38:26<53:37, 213.85it/s]

finished frames 5871000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978635/1666666 [1:38:26<53:35, 213.98it/s]

finished frames 5871600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978723/1666666 [1:38:27<53:34, 214.04it/s]

finished frames 5872200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978833/1666666 [1:38:27<53:32, 214.12it/s]

finished frames 5872800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 978943/1666666 [1:38:28<53:31, 214.11it/s]

finished frames 5873400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 979029/1666666 [1:38:28<57:12, 200.31it/s]

finished frames 5874000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▊    | 979138/1666666 [1:38:29<56:29, 202.83it/s]

finished frames 5874600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 979226/1666666 [1:38:29<54:18, 210.97it/s]

finished frames 5875200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 979336/1666666 [1:38:30<53:43, 213.22it/s]

finished frames 5875800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 979424/1666666 [1:38:30<53:22, 214.59it/s]

finished frames 5876400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 979534/1666666 [1:38:31<53:18, 214.82it/s]

finished frames 5877000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 979644/1666666 [1:38:31<53:19, 214.74it/s]

finished frames 5877600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 979732/1666666 [1:38:32<53:21, 214.56it/s]

finished frames 5878200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 979842/1666666 [1:38:32<53:15, 214.94it/s]

finished frames 5878800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 979930/1666666 [1:38:32<53:16, 214.84it/s]

finished frames 5879400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980040/1666666 [1:38:33<54:18, 210.70it/s]

finished frames 5880000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980128/1666666 [1:38:33<53:31, 213.80it/s]

finished frames 5880600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980238/1666666 [1:38:34<53:21, 214.39it/s]

finished frames 5881200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980326/1666666 [1:38:34<53:17, 214.64it/s]

finished frames 5881800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980436/1666666 [1:38:35<53:20, 214.43it/s]

finished frames 5882400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980524/1666666 [1:38:35<53:17, 214.59it/s]

finished frames 5883000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980634/1666666 [1:38:36<53:21, 214.26it/s]

finished frames 5883600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980744/1666666 [1:38:36<53:12, 214.86it/s]

finished frames 5884200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980832/1666666 [1:38:37<53:15, 214.66it/s]

finished frames 5884800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 980942/1666666 [1:38:37<53:09, 214.96it/s]

finished frames 5885400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981030/1666666 [1:38:38<54:19, 210.34it/s]

finished frames 5886000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981140/1666666 [1:38:38<53:18, 214.35it/s]

finished frames 5886600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981228/1666666 [1:38:39<53:25, 213.85it/s]

finished frames 5887200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981338/1666666 [1:38:39<53:13, 214.62it/s]

finished frames 5887800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981426/1666666 [1:38:40<57:38, 198.10it/s]  

finished frames 5888400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981535/1666666 [1:38:40<56:20, 202.70it/s]

finished frames 5889000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981623/1666666 [1:38:40<53:57, 211.59it/s]

finished frames 5889600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981733/1666666 [1:38:41<53:13, 214.46it/s]

finished frames 5890200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981843/1666666 [1:38:41<53:05, 214.97it/s]

finished frames 5890800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 981931/1666666 [1:38:42<53:14, 214.38it/s]

finished frames 5891400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982040/1666666 [1:38:42<54:54, 207.83it/s]

finished frames 5892000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982126/1666666 [1:38:43<54:24, 209.71it/s]

finished frames 5892600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982233/1666666 [1:38:43<54:12, 210.43it/s]

finished frames 5893200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982340/1666666 [1:38:44<54:36, 208.85it/s]

finished frames 5893800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982425/1666666 [1:38:44<54:25, 209.55it/s]

finished frames 5894400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982531/1666666 [1:38:45<54:32, 209.08it/s]

finished frames 5895000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982636/1666666 [1:38:45<54:30, 209.12it/s]

finished frames 5895600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982743/1666666 [1:38:46<54:27, 209.34it/s]

finished frames 5896200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982829/1666666 [1:38:46<54:21, 209.67it/s]

finished frames 5896800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 982935/1666666 [1:38:47<54:33, 208.86it/s]

finished frames 5897400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983019/1666666 [1:38:47<56:29, 201.68it/s]

finished frames 5898000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983127/1666666 [1:38:48<54:42, 208.24it/s]

finished frames 5898600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983234/1666666 [1:38:48<54:19, 209.67it/s]

finished frames 5899200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983342/1666666 [1:38:49<54:12, 210.11it/s]

finished frames 5899800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983429/1666666 [1:38:49<54:18, 209.66it/s]

finished frames 5900400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983536/1666666 [1:38:50<54:17, 209.70it/s]

finished frames 5901000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983643/1666666 [1:38:50<54:17, 209.67it/s]

finished frames 5901600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983728/1666666 [1:38:51<58:57, 193.04it/s]

finished frames 5902200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983836/1666666 [1:38:51<54:45, 207.81it/s]

finished frames 5902800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 983922/1666666 [1:38:51<54:22, 209.28it/s]

finished frames 5903400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984029/1666666 [1:38:52<55:39, 204.41it/s]

finished frames 5904000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984134/1666666 [1:38:52<54:59, 206.86it/s]

finished frames 5904600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984239/1666666 [1:38:53<54:48, 207.51it/s]

finished frames 5905200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984324/1666666 [1:38:53<54:30, 208.65it/s]

finished frames 5905800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984429/1666666 [1:38:54<54:28, 208.72it/s]

finished frames 5906400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984535/1666666 [1:38:54<54:43, 207.74it/s]

finished frames 5907000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984640/1666666 [1:38:55<54:51, 207.19it/s]

finished frames 5907600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984724/1666666 [1:38:55<54:45, 207.59it/s]

finished frames 5908200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984829/1666666 [1:38:56<54:54, 206.96it/s]

finished frames 5908800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 984934/1666666 [1:38:56<54:58, 206.68it/s]

finished frames 5909400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985039/1666666 [1:38:57<56:03, 202.67it/s]

finished frames 5910000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985123/1666666 [1:38:57<55:07, 206.04it/s]

finished frames 5910600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985228/1666666 [1:38:58<54:55, 206.77it/s]

finished frames 5911200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985334/1666666 [1:38:58<54:39, 207.78it/s]

finished frames 5911800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985439/1666666 [1:38:59<54:53, 206.81it/s]

finished frames 5912400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985523/1666666 [1:38:59<55:03, 206.20it/s]

finished frames 5913000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985629/1666666 [1:39:00<54:20, 208.87it/s]

finished frames 5913600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985739/1666666 [1:39:00<53:27, 212.30it/s]

finished frames 5914200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985827/1666666 [1:39:01<53:23, 212.54it/s]

finished frames 5914800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 985937/1666666 [1:39:01<53:28, 212.19it/s]

finished frames 5915400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986025/1666666 [1:39:02<54:30, 208.10it/s]

finished frames 5916000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986134/1666666 [1:39:02<55:11, 205.48it/s]

finished frames 5916600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986243/1666666 [1:39:03<55:25, 204.64it/s]

finished frames 5917200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986331/1666666 [1:39:03<53:46, 210.86it/s]

finished frames 5917800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986441/1666666 [1:39:04<53:14, 212.96it/s]

finished frames 5918400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986529/1666666 [1:39:04<53:25, 212.19it/s]

finished frames 5919000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986639/1666666 [1:39:05<53:08, 213.29it/s]

finished frames 5919600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986727/1666666 [1:39:05<53:19, 212.50it/s]

finished frames 5920200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986837/1666666 [1:39:05<53:25, 212.11it/s]

finished frames 5920800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 986925/1666666 [1:39:06<53:18, 212.53it/s]

finished frames 5921400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987035/1666666 [1:39:06<54:22, 208.30it/s]

finished frames 5922000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987122/1666666 [1:39:07<53:28, 211.78it/s]

finished frames 5922600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987232/1666666 [1:39:07<53:17, 212.52it/s]

finished frames 5923200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987342/1666666 [1:39:08<53:15, 212.57it/s]

finished frames 5923800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987430/1666666 [1:39:08<53:35, 211.21it/s]

finished frames 5924400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987540/1666666 [1:39:09<53:34, 211.27it/s]

finished frames 5925000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987628/1666666 [1:39:09<53:34, 211.23it/s]

finished frames 5925600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987738/1666666 [1:39:10<53:48, 210.29it/s]

finished frames 5926200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987825/1666666 [1:39:10<53:47, 210.31it/s]

finished frames 5926800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 987932/1666666 [1:39:11<54:21, 208.08it/s]

finished frames 5927400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988038/1666666 [1:39:11<55:35, 203.48it/s]

finished frames 5928000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988122/1666666 [1:39:12<54:53, 206.00it/s]

finished frames 5928600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988227/1666666 [1:39:12<54:12, 208.61it/s]

finished frames 5929200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988332/1666666 [1:39:13<54:26, 207.63it/s]

finished frames 5929800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988416/1666666 [1:39:13<59:29, 190.04it/s]

finished frames 5930400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988521/1666666 [1:39:14<55:44, 202.78it/s]

finished frames 5931000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988630/1666666 [1:39:14<54:03, 209.04it/s]

finished frames 5931600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988738/1666666 [1:39:15<53:44, 210.23it/s]

finished frames 5932200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988826/1666666 [1:39:15<53:40, 210.49it/s]

finished frames 5932800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 988935/1666666 [1:39:16<53:46, 210.03it/s]

finished frames 5933400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989021/1666666 [1:39:16<55:20, 204.09it/s]

finished frames 5934000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989129/1666666 [1:39:16<54:02, 208.95it/s]

finished frames 5934600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989236/1666666 [1:39:17<53:50, 209.71it/s]

finished frames 5935200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989322/1666666 [1:39:17<53:48, 209.81it/s]

finished frames 5935800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989430/1666666 [1:39:18<53:49, 209.67it/s]

finished frames 5936400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989537/1666666 [1:39:18<53:50, 209.62it/s]

finished frames 5937000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989623/1666666 [1:39:19<53:50, 209.60it/s]

finished frames 5937600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989729/1666666 [1:39:19<53:47, 209.74it/s]

finished frames 5938200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989837/1666666 [1:39:20<53:41, 210.09it/s]

finished frames 5938800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 989924/1666666 [1:39:20<53:50, 209.49it/s]

finished frames 5939400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990030/1666666 [1:39:21<55:03, 204.84it/s]

finished frames 5940000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990138/1666666 [1:39:21<53:45, 209.76it/s]

finished frames 5940600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990223/1666666 [1:39:22<53:55, 209.05it/s]

finished frames 5941200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990330/1666666 [1:39:22<53:41, 209.95it/s]

finished frames 5941800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990437/1666666 [1:39:23<53:38, 210.09it/s]

finished frames 5942400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990524/1666666 [1:39:23<53:35, 210.25it/s]

finished frames 5943000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990634/1666666 [1:39:24<53:39, 210.00it/s]

finished frames 5943600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990722/1666666 [1:39:24<53:28, 210.66it/s]

finished frames 5944200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990831/1666666 [1:39:25<55:16, 203.79it/s]

finished frames 5944800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 990938/1666666 [1:39:25<56:08, 200.57it/s]

finished frames 5945400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 991022/1666666 [1:39:26<55:33, 202.66it/s]

finished frames 5946000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 991129/1666666 [1:39:26<53:57, 208.66it/s]

finished frames 5946600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 991237/1666666 [1:39:27<53:56, 208.71it/s]

finished frames 5947200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 991325/1666666 [1:39:27<52:47, 213.21it/s]

finished frames 5947800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 991435/1666666 [1:39:28<52:36, 213.89it/s]

finished frames 5948400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 991523/1666666 [1:39:28<52:35, 213.97it/s]

finished frames 5949000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 59%|█████▉    | 991633/1666666 [1:39:28<52:14, 215.36it/s]

finished frames 5949600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 991743/1666666 [1:39:29<52:13, 215.36it/s]

finished frames 5950200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 991831/1666666 [1:39:29<52:10, 215.55it/s]

finished frames 5950800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 991941/1666666 [1:39:30<52:04, 215.93it/s]

finished frames 5951400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992029/1666666 [1:39:30<53:38, 209.63it/s]

finished frames 5952000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992139/1666666 [1:39:31<52:22, 214.64it/s]

finished frames 5952600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992227/1666666 [1:39:31<52:19, 214.82it/s]

finished frames 5953200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992337/1666666 [1:39:32<52:10, 215.40it/s]

finished frames 5953800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992425/1666666 [1:39:32<52:22, 214.55it/s]

finished frames 5954400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992535/1666666 [1:39:33<52:25, 214.32it/s]

finished frames 5955000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992623/1666666 [1:39:33<52:28, 214.09it/s]

finished frames 5955600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992733/1666666 [1:39:34<52:22, 214.44it/s]

finished frames 5956200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992843/1666666 [1:39:34<52:21, 214.48it/s]

finished frames 5956800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 992931/1666666 [1:39:35<52:20, 214.54it/s]

finished frames 5957400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993041/1666666 [1:39:35<53:45, 208.83it/s]

finished frames 5958000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993129/1666666 [1:39:35<52:46, 212.70it/s]

finished frames 5958600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993238/1666666 [1:39:36<53:36, 209.36it/s]

finished frames 5959200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993326/1666666 [1:39:36<55:08, 203.52it/s]

finished frames 5959800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993435/1666666 [1:39:37<53:02, 211.55it/s]

finished frames 5960400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993523/1666666 [1:39:37<52:45, 212.66it/s]

finished frames 5961000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993633/1666666 [1:39:38<52:36, 213.23it/s]

finished frames 5961600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993743/1666666 [1:39:38<52:32, 213.44it/s]

finished frames 5962200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993831/1666666 [1:39:39<52:37, 213.11it/s]

finished frames 5962800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 993941/1666666 [1:39:39<52:33, 213.35it/s]

finished frames 5963400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994028/1666666 [1:39:40<54:06, 207.20it/s]

finished frames 5964000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994134/1666666 [1:39:40<53:43, 208.66it/s]

finished frames 5964600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994241/1666666 [1:39:41<53:29, 209.51it/s]

finished frames 5965200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994326/1666666 [1:39:41<53:38, 208.91it/s]

finished frames 5965800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994433/1666666 [1:39:42<53:31, 209.33it/s]

finished frames 5966400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994541/1666666 [1:39:42<53:15, 210.33it/s]

finished frames 5967000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994629/1666666 [1:39:43<53:10, 210.64it/s]

finished frames 5967600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994739/1666666 [1:39:43<53:09, 210.66it/s]

finished frames 5968200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994827/1666666 [1:39:44<53:06, 210.82it/s]

finished frames 5968800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 994936/1666666 [1:39:44<53:07, 210.74it/s]

finished frames 5969400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995022/1666666 [1:39:45<54:39, 204.79it/s]

finished frames 5970000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995131/1666666 [1:39:45<53:10, 210.51it/s]

finished frames 5970600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995241/1666666 [1:39:46<53:02, 210.98it/s]

finished frames 5971200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995329/1666666 [1:39:46<52:57, 211.26it/s]

finished frames 5971800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995436/1666666 [1:39:47<53:30, 209.09it/s]

finished frames 5972400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995520/1666666 [1:39:47<56:52, 196.67it/s]

finished frames 5973000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995624/1666666 [1:39:47<55:00, 203.32it/s]

finished frames 5973600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995729/1666666 [1:39:48<54:22, 205.66it/s]

finished frames 5974200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995835/1666666 [1:39:48<53:39, 208.38it/s]

finished frames 5974800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 995940/1666666 [1:39:49<54:08, 206.49it/s]

finished frames 5975400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996024/1666666 [1:39:49<55:24, 201.70it/s]

finished frames 5976000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996129/1666666 [1:39:50<54:17, 205.82it/s]

finished frames 5976600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996234/1666666 [1:39:50<54:01, 206.84it/s]

finished frames 5977200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996339/1666666 [1:39:51<53:51, 207.46it/s]

finished frames 5977800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996423/1666666 [1:39:51<54:03, 206.66it/s]

finished frames 5978400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996528/1666666 [1:39:52<54:06, 206.44it/s]

finished frames 5979000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996633/1666666 [1:39:52<53:57, 206.97it/s]

finished frames 5979600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996738/1666666 [1:39:53<54:01, 206.66it/s]

finished frames 5980200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996822/1666666 [1:39:53<54:29, 204.89it/s]

finished frames 5980800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 996932/1666666 [1:39:54<52:27, 212.79it/s]

finished frames 5981400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997039/1666666 [1:39:54<54:17, 205.56it/s]

finished frames 5982000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997124/1666666 [1:39:55<53:49, 207.31it/s]

finished frames 5982600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997232/1666666 [1:39:55<52:47, 211.33it/s]

finished frames 5983200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997343/1666666 [1:39:56<51:33, 216.37it/s]

finished frames 5983800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997431/1666666 [1:39:56<51:17, 217.49it/s]

finished frames 5984400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997542/1666666 [1:39:57<51:07, 218.10it/s]

finished frames 5985000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997630/1666666 [1:39:57<51:51, 215.00it/s]

finished frames 5985600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997740/1666666 [1:39:58<52:35, 211.96it/s]

finished frames 5986200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997828/1666666 [1:39:58<53:01, 210.24it/s]

finished frames 5986800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 997938/1666666 [1:39:59<52:11, 213.58it/s]

finished frames 5987400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998026/1666666 [1:39:59<53:15, 209.25it/s]

finished frames 5988000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998137/1666666 [1:39:59<51:44, 215.36it/s]

finished frames 5988600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998225/1666666 [1:40:00<52:06, 213.78it/s]

finished frames 5989200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998335/1666666 [1:40:00<52:31, 212.08it/s]

finished frames 5989800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998444/1666666 [1:40:01<52:10, 213.46it/s]

finished frames 5990400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998532/1666666 [1:40:01<51:51, 214.71it/s]

finished frames 5991000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998642/1666666 [1:40:02<51:56, 214.34it/s]

finished frames 5991600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998730/1666666 [1:40:02<51:54, 214.43it/s]

finished frames 5992200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998840/1666666 [1:40:03<52:08, 213.50it/s]

finished frames 5992800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 998928/1666666 [1:40:03<52:00, 213.97it/s]

finished frames 5993400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999038/1666666 [1:40:04<53:06, 209.55it/s]

finished frames 5994000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999126/1666666 [1:40:04<52:20, 212.57it/s]

finished frames 5994600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999236/1666666 [1:40:05<51:45, 214.89it/s]

finished frames 5995200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999324/1666666 [1:40:05<51:44, 214.99it/s]

finished frames 5995800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999434/1666666 [1:40:06<51:38, 215.35it/s]

finished frames 5996400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999544/1666666 [1:40:06<51:32, 215.71it/s]

finished frames 5997000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999632/1666666 [1:40:06<51:35, 215.51it/s]

finished frames 5997600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999742/1666666 [1:40:07<51:33, 215.60it/s]

finished frames 5998200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999830/1666666 [1:40:07<51:33, 215.54it/s]

finished frames 5998800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|█████▉    | 999940/1666666 [1:40:08<51:34, 215.44it/s]

finished frames 5999400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000028/1666666 [1:40:08<52:44, 210.68it/s]

finished frames 6000000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000138/1666666 [1:40:09<51:46, 214.59it/s]

finished frames 6000600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000226/1666666 [1:40:09<51:39, 215.01it/s]

finished frames 6001200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000336/1666666 [1:40:10<54:40, 203.12it/s]  

finished frames 6001800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000424/1666666 [1:40:10<54:36, 203.36it/s]

finished frames 6002400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000534/1666666 [1:40:11<51:53, 213.96it/s]

finished frames 6003000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000644/1666666 [1:40:11<51:30, 215.47it/s]

finished frames 6003600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000732/1666666 [1:40:12<51:28, 215.60it/s]

finished frames 6004200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000842/1666666 [1:40:12<51:20, 216.17it/s]

finished frames 6004800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1000930/1666666 [1:40:13<51:40, 214.75it/s]

finished frames 6005400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001040/1666666 [1:40:13<52:55, 209.62it/s]

finished frames 6006000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001128/1666666 [1:40:14<54:05, 205.08it/s]

finished frames 6006600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001236/1666666 [1:40:14<52:46, 210.14it/s]

finished frames 6007200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001324/1666666 [1:40:14<52:44, 210.28it/s]

finished frames 6007800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001433/1666666 [1:40:15<52:36, 210.77it/s]

finished frames 6008400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001543/1666666 [1:40:16<52:29, 211.17it/s]

finished frames 6009000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001630/1666666 [1:40:16<52:57, 209.29it/s]

finished frames 6009600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001735/1666666 [1:40:16<53:20, 207.73it/s]

finished frames 6010200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001840/1666666 [1:40:17<53:22, 207.62it/s]

finished frames 6010800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1001924/1666666 [1:40:17<53:23, 207.50it/s]

finished frames 6011400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002029/1666666 [1:40:18<54:34, 202.94it/s]

finished frames 6012000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002134/1666666 [1:40:18<53:33, 206.79it/s]

finished frames 6012600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002239/1666666 [1:40:19<53:21, 207.54it/s]

finished frames 6013200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002323/1666666 [1:40:19<53:21, 207.52it/s]

finished frames 6013800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002428/1666666 [1:40:20<53:29, 206.95it/s]

finished frames 6014400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002533/1666666 [1:40:20<53:28, 207.02it/s]

finished frames 6015000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002617/1666666 [1:40:21<1:02:42, 176.51it/s]

finished frames 6015600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002722/1666666 [1:40:21<54:58, 201.26it/s]  

finished frames 6016200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002827/1666666 [1:40:22<53:28, 206.92it/s]

finished frames 6016800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1002933/1666666 [1:40:22<53:17, 207.55it/s]

finished frames 6017400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003038/1666666 [1:40:23<55:20, 199.87it/s]

finished frames 6018000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003122/1666666 [1:40:23<54:03, 204.57it/s]

finished frames 6018600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003227/1666666 [1:40:24<53:35, 206.34it/s]

finished frames 6019200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003332/1666666 [1:40:24<53:17, 207.43it/s]

finished frames 6019800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003437/1666666 [1:40:25<53:14, 207.62it/s]

finished frames 6020400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003542/1666666 [1:40:25<53:27, 206.71it/s]

finished frames 6021000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003626/1666666 [1:40:26<53:35, 206.18it/s]

finished frames 6021600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003731/1666666 [1:40:26<53:19, 207.18it/s]

finished frames 6022200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003837/1666666 [1:40:27<52:59, 208.46it/s]

finished frames 6022800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1003942/1666666 [1:40:27<53:05, 208.03it/s]

finished frames 6023400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004026/1666666 [1:40:28<54:26, 202.84it/s]

finished frames 6024000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004131/1666666 [1:40:28<53:16, 207.25it/s]

finished frames 6024600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004236/1666666 [1:40:29<53:19, 207.04it/s]

finished frames 6025200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004341/1666666 [1:40:29<53:17, 207.14it/s]

finished frames 6025800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004425/1666666 [1:40:30<53:32, 206.13it/s]

finished frames 6026400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004530/1666666 [1:40:30<53:28, 206.40it/s]

finished frames 6027000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004635/1666666 [1:40:31<53:30, 206.24it/s]

finished frames 6027600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004740/1666666 [1:40:31<53:23, 206.63it/s]

finished frames 6028200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004824/1666666 [1:40:31<53:39, 205.56it/s]

finished frames 6028800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1004929/1666666 [1:40:32<1:02:47, 175.66it/s]

finished frames 6029400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005033/1666666 [1:40:33<1:00:56, 180.97it/s]

finished frames 6030000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005138/1666666 [1:40:33<54:47, 201.25it/s]  

finished frames 6030600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005222/1666666 [1:40:34<53:47, 204.91it/s]

finished frames 6031200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005327/1666666 [1:40:34<52:59, 208.02it/s]

finished frames 6031800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005432/1666666 [1:40:35<53:09, 207.29it/s]

finished frames 6032400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005537/1666666 [1:40:35<53:05, 207.56it/s]

finished frames 6033000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005642/1666666 [1:40:36<53:04, 207.55it/s]

finished frames 6033600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005726/1666666 [1:40:36<53:08, 207.29it/s]

finished frames 6034200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005831/1666666 [1:40:36<53:09, 207.21it/s]

finished frames 6034800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1005936/1666666 [1:40:37<53:09, 207.19it/s]

finished frames 6035400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006020/1666666 [1:40:37<54:40, 201.38it/s]

finished frames 6036000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006125/1666666 [1:40:38<53:19, 206.46it/s]

finished frames 6036600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006230/1666666 [1:40:38<53:20, 206.35it/s]

finished frames 6037200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006335/1666666 [1:40:39<53:02, 207.46it/s]

finished frames 6037800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006440/1666666 [1:40:39<53:08, 207.08it/s]

finished frames 6038400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006525/1666666 [1:40:40<53:00, 207.56it/s]

finished frames 6039000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006633/1666666 [1:40:40<52:07, 211.04it/s]

finished frames 6039600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006743/1666666 [1:40:41<51:47, 212.39it/s]

finished frames 6040200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006831/1666666 [1:40:41<51:34, 213.21it/s]

finished frames 6040800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1006941/1666666 [1:40:42<51:27, 213.68it/s]

finished frames 6041400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007029/1666666 [1:40:42<52:42, 208.58it/s]

finished frames 6042000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007139/1666666 [1:40:43<51:40, 212.75it/s]

finished frames 6042600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007227/1666666 [1:40:43<51:30, 213.40it/s]

finished frames 6043200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007337/1666666 [1:40:44<52:59, 207.35it/s]

finished frames 6043800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007424/1666666 [1:40:44<56:29, 194.51it/s]  

finished frames 6044400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007534/1666666 [1:40:45<52:26, 209.50it/s]

finished frames 6045000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007644/1666666 [1:40:45<51:35, 212.93it/s]

finished frames 6045600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007732/1666666 [1:40:46<51:31, 213.17it/s]

finished frames 6046200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007842/1666666 [1:40:46<51:27, 213.40it/s]

finished frames 6046800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1007930/1666666 [1:40:47<51:16, 214.11it/s]

finished frames 6047400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1008040/1666666 [1:40:47<52:40, 208.43it/s]

finished frames 6048000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1008127/1666666 [1:40:47<51:47, 211.94it/s]

finished frames 6048600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1008237/1666666 [1:40:48<51:35, 212.71it/s]

finished frames 6049200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 60%|██████    | 1008325/1666666 [1:40:48<51:30, 213.02it/s]

finished frames 6049800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1008435/1666666 [1:40:49<52:05, 210.57it/s]

finished frames 6050400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1008522/1666666 [1:40:49<52:26, 209.16it/s]

finished frames 6051000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1008629/1666666 [1:40:50<52:21, 209.47it/s]

finished frames 6051600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1008737/1666666 [1:40:50<52:20, 209.49it/s]

finished frames 6052200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1008823/1666666 [1:40:51<52:21, 209.42it/s]

finished frames 6052800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1008929/1666666 [1:40:51<52:30, 208.78it/s]

finished frames 6053400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009034/1666666 [1:40:52<53:43, 204.04it/s]

finished frames 6054000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009140/1666666 [1:40:52<52:40, 208.02it/s]

finished frames 6054600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009224/1666666 [1:40:53<52:42, 207.90it/s]

finished frames 6055200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009329/1666666 [1:40:53<52:34, 208.37it/s]

finished frames 6055800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009437/1666666 [1:40:54<52:15, 209.59it/s]

finished frames 6056400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009523/1666666 [1:40:54<52:07, 210.14it/s]

finished frames 6057000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009629/1666666 [1:40:55<53:25, 204.98it/s]

finished frames 6057600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009715/1666666 [1:40:55<52:32, 208.38it/s]

finished frames 6058200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009842/1666666 [1:40:56<53:16, 205.51it/s]

finished frames 6058800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1009928/1666666 [1:40:56<52:29, 208.50it/s]

finished frames 6059400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010035/1666666 [1:40:57<53:28, 204.63it/s]

finished frames 6060000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010142/1666666 [1:40:57<52:20, 209.06it/s]

finished frames 6060600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010228/1666666 [1:40:58<52:05, 210.06it/s]

finished frames 6061200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010338/1666666 [1:40:58<51:56, 210.59it/s]

finished frames 6061800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010425/1666666 [1:40:59<51:53, 210.78it/s]

finished frames 6062400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010535/1666666 [1:40:59<51:59, 210.33it/s]

finished frames 6063000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010623/1666666 [1:40:59<51:51, 210.82it/s]

finished frames 6063600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010733/1666666 [1:41:00<51:39, 211.64it/s]

finished frames 6064200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010843/1666666 [1:41:00<51:37, 211.76it/s]

finished frames 6064800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1010931/1666666 [1:41:01<51:41, 211.42it/s]

finished frames 6065400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011041/1666666 [1:41:01<52:52, 206.68it/s]

finished frames 6066000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011128/1666666 [1:41:02<51:58, 210.19it/s]

finished frames 6066600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011238/1666666 [1:41:02<51:46, 210.97it/s]

finished frames 6067200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011326/1666666 [1:41:03<51:50, 210.70it/s]

finished frames 6067800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011436/1666666 [1:41:03<51:43, 211.15it/s]

finished frames 6068400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011524/1666666 [1:41:04<51:32, 211.86it/s]

finished frames 6069000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011634/1666666 [1:41:04<51:35, 211.59it/s]

finished frames 6069600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011722/1666666 [1:41:05<51:50, 210.57it/s]

finished frames 6070200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011832/1666666 [1:41:05<51:47, 210.75it/s]

finished frames 6070800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1011942/1666666 [1:41:06<51:52, 210.32it/s]

finished frames 6071400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012027/1666666 [1:41:06<55:02, 198.23it/s]

finished frames 6072000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012134/1666666 [1:41:07<54:35, 199.82it/s]

finished frames 6072600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012242/1666666 [1:41:07<52:08, 209.15it/s]

finished frames 6073200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012328/1666666 [1:41:08<51:49, 210.45it/s]

finished frames 6073800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012436/1666666 [1:41:08<51:51, 210.27it/s]

finished frames 6074400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012523/1666666 [1:41:09<51:46, 210.58it/s]

finished frames 6075000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012632/1666666 [1:41:09<51:47, 210.44it/s]

finished frames 6075600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012742/1666666 [1:41:10<51:40, 210.90it/s]

finished frames 6076200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012830/1666666 [1:41:10<51:34, 211.26it/s]

finished frames 6076800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1012940/1666666 [1:41:11<51:34, 211.26it/s]

finished frames 6077400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013027/1666666 [1:41:11<52:39, 206.88it/s]

finished frames 6078000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013136/1666666 [1:41:11<51:46, 210.40it/s]

finished frames 6078600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013223/1666666 [1:41:12<51:39, 210.81it/s]

finished frames 6079200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013333/1666666 [1:41:12<51:33, 211.22it/s]

finished frames 6079800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013443/1666666 [1:41:13<51:32, 211.25it/s]

finished frames 6080400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013529/1666666 [1:41:13<52:01, 209.22it/s]

finished frames 6081000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013635/1666666 [1:41:14<52:07, 208.83it/s]

finished frames 6081600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013740/1666666 [1:41:14<52:17, 208.08it/s]

finished frames 6082200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013824/1666666 [1:41:15<52:23, 207.67it/s]

finished frames 6082800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1013929/1666666 [1:41:15<52:35, 206.84it/s]

finished frames 6083400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014034/1666666 [1:41:16<53:31, 203.23it/s]

finished frames 6084000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014139/1666666 [1:41:16<52:24, 207.49it/s]

finished frames 6084600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014224/1666666 [1:41:17<52:16, 207.99it/s]

finished frames 6085200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014329/1666666 [1:41:17<52:17, 207.93it/s]

finished frames 6085800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014434/1666666 [1:41:18<52:20, 207.67it/s]

finished frames 6086400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014539/1666666 [1:41:18<52:07, 208.50it/s]

finished frames 6087000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014623/1666666 [1:41:19<52:15, 207.96it/s]

finished frames 6087600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014729/1666666 [1:41:19<52:01, 208.84it/s]

finished frames 6088200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014834/1666666 [1:41:20<52:18, 207.70it/s]

finished frames 6088800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1014939/1666666 [1:41:20<52:08, 208.30it/s]

finished frames 6089400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015023/1666666 [1:41:21<53:32, 202.83it/s]

finished frames 6090000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015128/1666666 [1:41:21<52:43, 205.96it/s]

finished frames 6090600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015234/1666666 [1:41:22<52:16, 207.69it/s]

finished frames 6091200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015339/1666666 [1:41:22<52:01, 208.67it/s]

finished frames 6091800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015423/1666666 [1:41:22<52:11, 207.95it/s]

finished frames 6092400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015529/1666666 [1:41:23<52:06, 208.25it/s]

finished frames 6093000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015634/1666666 [1:41:24<52:05, 208.32it/s]

finished frames 6093600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015740/1666666 [1:41:24<52:01, 208.52it/s]

finished frames 6094200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015825/1666666 [1:41:24<52:06, 208.20it/s]

finished frames 6094800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1015930/1666666 [1:41:25<52:12, 207.76it/s]

finished frames 6095400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016035/1666666 [1:41:25<53:22, 203.13it/s]

finished frames 6096000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016140/1666666 [1:41:26<52:23, 206.91it/s]

finished frames 6096600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016224/1666666 [1:41:26<52:17, 207.32it/s]

finished frames 6097200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016329/1666666 [1:41:27<52:17, 207.28it/s]

finished frames 6097800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016434/1666666 [1:41:27<52:06, 207.94it/s]

finished frames 6098400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016539/1666666 [1:41:28<52:08, 207.83it/s]

finished frames 6099000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016623/1666666 [1:41:28<52:13, 207.44it/s]

finished frames 6099600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016728/1666666 [1:41:29<52:02, 208.12it/s]

finished frames 6100200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016833/1666666 [1:41:29<54:18, 199.44it/s]

finished frames 6100800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1016938/1666666 [1:41:30<52:26, 206.51it/s]

finished frames 6101400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017022/1666666 [1:41:30<53:27, 202.54it/s]

finished frames 6102000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017127/1666666 [1:41:31<52:24, 206.57it/s]

finished frames 6102600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017232/1666666 [1:41:31<52:13, 207.27it/s]

finished frames 6103200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017337/1666666 [1:41:32<52:05, 207.73it/s]

finished frames 6103800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017442/1666666 [1:41:32<52:00, 208.02it/s]

finished frames 6104400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017526/1666666 [1:41:33<52:01, 207.94it/s]

finished frames 6105000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017631/1666666 [1:41:33<52:26, 206.27it/s]

finished frames 6105600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017736/1666666 [1:41:34<52:12, 207.16it/s]

finished frames 6106200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017841/1666666 [1:41:34<52:19, 206.63it/s]

finished frames 6106800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1017925/1666666 [1:41:35<52:08, 207.37it/s]

finished frames 6107400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018030/1666666 [1:41:35<53:19, 202.75it/s]

finished frames 6108000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018135/1666666 [1:41:36<52:28, 205.99it/s]

finished frames 6108600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018240/1666666 [1:41:36<52:08, 207.27it/s]

finished frames 6109200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018324/1666666 [1:41:37<52:16, 206.74it/s]

finished frames 6109800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018429/1666666 [1:41:37<52:01, 207.64it/s]

finished frames 6110400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018534/1666666 [1:41:38<52:35, 205.37it/s]

finished frames 6111000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018639/1666666 [1:41:38<52:17, 206.55it/s]

finished frames 6111600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018723/1666666 [1:41:38<52:20, 206.31it/s]

finished frames 6112200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018829/1666666 [1:41:39<52:00, 207.63it/s]

finished frames 6112800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1018935/1666666 [1:41:40<51:45, 208.61it/s]

finished frames 6113400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019039/1666666 [1:41:40<56:07, 192.29it/s]  

finished frames 6114000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019124/1666666 [1:41:41<55:35, 194.16it/s]

finished frames 6114600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019233/1666666 [1:41:41<51:23, 209.94it/s]

finished frames 6115200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019343/1666666 [1:41:42<50:44, 212.60it/s]

finished frames 6115800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019431/1666666 [1:41:42<50:50, 212.15it/s]

finished frames 6116400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019541/1666666 [1:41:42<50:50, 212.17it/s]

finished frames 6117000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019629/1666666 [1:41:43<50:47, 212.31it/s]

finished frames 6117600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019739/1666666 [1:41:43<50:30, 213.47it/s]

finished frames 6118200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019827/1666666 [1:41:44<50:44, 212.46it/s]

finished frames 6118800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1019937/1666666 [1:41:44<50:36, 213.00it/s]

finished frames 6119400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1020024/1666666 [1:41:45<51:56, 207.50it/s]

finished frames 6120000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1020133/1666666 [1:41:45<51:01, 211.17it/s]

finished frames 6120600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1020243/1666666 [1:41:46<50:39, 212.66it/s]

finished frames 6121200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1020331/1666666 [1:41:46<50:37, 212.75it/s]

finished frames 6121800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1020441/1666666 [1:41:47<50:49, 211.93it/s]

finished frames 6122400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1020529/1666666 [1:41:47<50:32, 213.10it/s]

finished frames 6123000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1020639/1666666 [1:41:48<50:26, 213.47it/s]

finished frames 6123600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████    | 1020727/1666666 [1:41:48<50:28, 213.26it/s]

finished frames 6124200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1020837/1666666 [1:41:49<50:30, 213.11it/s]

finished frames 6124800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1020925/1666666 [1:41:49<50:37, 212.59it/s]

finished frames 6125400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021035/1666666 [1:41:50<51:46, 207.85it/s]

finished frames 6126000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021122/1666666 [1:41:50<50:56, 211.21it/s]

finished frames 6126600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021232/1666666 [1:41:50<50:38, 212.43it/s]

finished frames 6127200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021320/1666666 [1:41:51<50:45, 211.92it/s]

finished frames 6127800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021430/1666666 [1:41:51<51:32, 208.65it/s]

finished frames 6128400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021540/1666666 [1:41:52<52:19, 205.48it/s]

finished frames 6129000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021628/1666666 [1:41:52<50:52, 211.34it/s]

finished frames 6129600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021738/1666666 [1:41:53<50:49, 211.47it/s]

finished frames 6130200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021825/1666666 [1:41:53<51:44, 207.70it/s]

finished frames 6130800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1021938/1666666 [1:41:54<49:09, 218.58it/s]

finished frames 6131400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022026/1666666 [1:41:54<51:02, 210.48it/s]

finished frames 6132000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022136/1666666 [1:41:55<51:17, 209.47it/s]

finished frames 6132600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022224/1666666 [1:41:55<50:39, 212.00it/s]

finished frames 6133200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022334/1666666 [1:41:56<49:26, 217.18it/s]

finished frames 6133800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022423/1666666 [1:41:56<49:04, 218.76it/s]

finished frames 6134400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022533/1666666 [1:41:57<49:26, 217.17it/s]

finished frames 6135000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022643/1666666 [1:41:57<49:50, 215.33it/s]

finished frames 6135600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022733/1666666 [1:41:58<49:48, 215.50it/s]

finished frames 6136200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022843/1666666 [1:41:58<50:33, 212.26it/s]

finished frames 6136800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1022933/1666666 [1:41:58<49:38, 216.14it/s]

finished frames 6137400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023021/1666666 [1:41:59<51:21, 208.86it/s]

finished frames 6138000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023131/1666666 [1:41:59<50:49, 211.01it/s]

finished frames 6138600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023241/1666666 [1:42:00<50:18, 213.18it/s]

finished frames 6139200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023329/1666666 [1:42:00<50:23, 212.78it/s]

finished frames 6139800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023437/1666666 [1:42:01<51:40, 207.49it/s]

finished frames 6140400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023524/1666666 [1:42:01<50:33, 212.03it/s]

finished frames 6141000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023634/1666666 [1:42:02<50:15, 213.24it/s]

finished frames 6141600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023722/1666666 [1:42:02<50:08, 213.73it/s]

finished frames 6142200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023832/1666666 [1:42:03<51:03, 209.84it/s]

finished frames 6142800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1023941/1666666 [1:42:03<51:12, 209.18it/s]

finished frames 6143400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024028/1666666 [1:42:04<51:55, 206.30it/s]

finished frames 6144000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024136/1666666 [1:42:04<50:40, 211.35it/s]

finished frames 6144600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024224/1666666 [1:42:05<50:15, 213.02it/s]

finished frames 6145200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024334/1666666 [1:42:05<50:00, 214.05it/s]

finished frames 6145800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024444/1666666 [1:42:06<49:52, 214.63it/s]

finished frames 6146400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024532/1666666 [1:42:06<49:48, 214.90it/s]

finished frames 6147000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024642/1666666 [1:42:07<49:49, 214.77it/s]

finished frames 6147600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024730/1666666 [1:42:07<50:03, 213.76it/s]

finished frames 6148200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024840/1666666 [1:42:08<49:53, 214.42it/s]

finished frames 6148800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 61%|██████▏   | 1024928/1666666 [1:42:08<49:56, 214.16it/s]

finished frames 6149400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025038/1666666 [1:42:08<50:58, 209.76it/s]

finished frames 6150000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025126/1666666 [1:42:09<50:05, 213.49it/s]

finished frames 6150600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025236/1666666 [1:42:09<50:34, 211.40it/s]

finished frames 6151200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025323/1666666 [1:42:10<50:56, 209.86it/s]

finished frames 6151800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025430/1666666 [1:42:10<50:56, 209.77it/s]

finished frames 6152400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025535/1666666 [1:42:11<51:18, 208.24it/s]

finished frames 6153000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025640/1666666 [1:42:11<51:19, 208.13it/s]

finished frames 6153600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025724/1666666 [1:42:12<51:46, 206.30it/s]

finished frames 6154200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025829/1666666 [1:42:12<51:43, 206.47it/s]

finished frames 6154800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1025934/1666666 [1:42:13<51:37, 206.86it/s]

finished frames 6155400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026041/1666666 [1:42:13<51:43, 206.42it/s]

finished frames 6156000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026128/1666666 [1:42:14<53:30, 199.53it/s]

finished frames 6156600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026238/1666666 [1:42:14<50:33, 211.10it/s]

finished frames 6157200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026326/1666666 [1:42:15<50:07, 212.89it/s]

finished frames 6157800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026434/1666666 [1:42:15<51:02, 209.03it/s]

finished frames 6158400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026539/1666666 [1:42:16<51:59, 205.18it/s]

finished frames 6159000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026623/1666666 [1:42:16<52:22, 203.67it/s]

finished frames 6159600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026728/1666666 [1:42:17<52:30, 203.15it/s]

finished frames 6160200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026833/1666666 [1:42:17<52:20, 203.71it/s]

finished frames 6160800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1026938/1666666 [1:42:18<52:12, 204.24it/s]

finished frames 6161400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027021/1666666 [1:42:18<53:50, 198.01it/s]

finished frames 6162000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027123/1666666 [1:42:19<52:42, 202.25it/s]

finished frames 6162600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027228/1666666 [1:42:19<52:26, 203.22it/s]

finished frames 6163200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027333/1666666 [1:42:20<52:12, 204.10it/s]

finished frames 6163800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027438/1666666 [1:42:20<52:06, 204.47it/s]

finished frames 6164400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027522/1666666 [1:42:21<52:22, 203.36it/s]

finished frames 6165000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027627/1666666 [1:42:21<52:25, 203.18it/s]

finished frames 6165600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027732/1666666 [1:42:22<52:20, 203.42it/s]

finished frames 6166200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027837/1666666 [1:42:22<52:12, 203.93it/s]

finished frames 6166800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1027942/1666666 [1:42:23<52:12, 203.89it/s]

finished frames 6167400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028026/1666666 [1:42:23<52:59, 200.84it/s]

finished frames 6168000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028131/1666666 [1:42:24<51:54, 205.03it/s]

finished frames 6168600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028236/1666666 [1:42:24<51:43, 205.68it/s]

finished frames 6169200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028343/1666666 [1:42:25<51:15, 207.56it/s]

finished frames 6169800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028428/1666666 [1:42:25<54:04, 196.74it/s]

finished frames 6170400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028534/1666666 [1:42:26<51:32, 206.33it/s]

finished frames 6171000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028642/1666666 [1:42:26<50:46, 209.42it/s]

finished frames 6171600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028728/1666666 [1:42:26<50:39, 209.85it/s]

finished frames 6172200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028836/1666666 [1:42:27<50:50, 209.07it/s]

finished frames 6172800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1028942/1666666 [1:42:27<50:46, 209.34it/s]

finished frames 6173400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029027/1666666 [1:42:28<51:41, 205.60it/s]

finished frames 6174000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029136/1666666 [1:42:28<50:44, 209.38it/s]

finished frames 6174600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029244/1666666 [1:42:29<49:54, 212.84it/s]

finished frames 6175200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029332/1666666 [1:42:29<49:35, 214.17it/s]

finished frames 6175800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029442/1666666 [1:42:30<49:51, 212.99it/s]

finished frames 6176400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029528/1666666 [1:42:30<50:50, 208.86it/s]

finished frames 6177000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029633/1666666 [1:42:31<50:59, 208.25it/s]

finished frames 6177600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029738/1666666 [1:42:31<51:08, 207.55it/s]

finished frames 6178200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029822/1666666 [1:42:32<51:21, 206.66it/s]

finished frames 6178800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1029927/1666666 [1:42:32<51:14, 207.13it/s]

finished frames 6179400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030032/1666666 [1:42:33<52:28, 202.18it/s]

finished frames 6180000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030137/1666666 [1:42:33<51:26, 206.24it/s]

finished frames 6180600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030222/1666666 [1:42:34<51:25, 206.25it/s]

finished frames 6181200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030327/1666666 [1:42:34<51:09, 207.30it/s]

finished frames 6181800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030432/1666666 [1:42:35<50:56, 208.16it/s]

finished frames 6182400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030537/1666666 [1:42:35<50:47, 208.74it/s]

finished frames 6183000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030642/1666666 [1:42:36<50:51, 208.42it/s]

finished frames 6183600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030726/1666666 [1:42:36<50:45, 208.84it/s]

finished frames 6184200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030831/1666666 [1:42:37<51:52, 204.30it/s]

finished frames 6184800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1030936/1666666 [1:42:37<54:08, 195.68it/s]  

finished frames 6185400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031020/1666666 [1:42:38<53:25, 198.32it/s]

finished frames 6186000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031126/1666666 [1:42:38<51:14, 206.72it/s]

finished frames 6186600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031231/1666666 [1:42:39<50:52, 208.17it/s]

finished frames 6187200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031337/1666666 [1:42:39<50:44, 208.67it/s]

finished frames 6187800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031442/1666666 [1:42:40<50:43, 208.70it/s]

finished frames 6188400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031527/1666666 [1:42:40<50:44, 208.59it/s]

finished frames 6189000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031632/1666666 [1:42:41<50:49, 208.25it/s]

finished frames 6189600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031738/1666666 [1:42:41<50:46, 208.41it/s]

finished frames 6190200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031823/1666666 [1:42:41<50:44, 208.54it/s]

finished frames 6190800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1031929/1666666 [1:42:42<50:40, 208.73it/s]

finished frames 6191400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032034/1666666 [1:42:42<51:53, 203.83it/s]

finished frames 6192000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032139/1666666 [1:42:43<51:03, 207.12it/s]

finished frames 6192600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032223/1666666 [1:42:43<51:00, 207.28it/s]

finished frames 6193200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032328/1666666 [1:42:44<50:50, 207.94it/s]

finished frames 6193800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032434/1666666 [1:42:44<50:41, 208.50it/s]

finished frames 6194400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032539/1666666 [1:42:45<50:38, 208.72it/s]

finished frames 6195000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032623/1666666 [1:42:45<50:35, 208.84it/s]

finished frames 6195600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032728/1666666 [1:42:46<50:42, 208.34it/s]

finished frames 6196200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032833/1666666 [1:42:46<50:39, 208.52it/s]

finished frames 6196800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1032938/1666666 [1:42:47<50:53, 207.51it/s]

finished frames 6197400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033022/1666666 [1:42:47<52:00, 203.09it/s]

finished frames 6198000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033127/1666666 [1:42:48<53:19, 198.00it/s]

finished frames 6198600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033234/1666666 [1:42:48<52:56, 199.43it/s]

finished frames 6199200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033343/1666666 [1:42:49<50:21, 209.62it/s]

finished frames 6199800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033431/1666666 [1:42:49<49:47, 211.98it/s]

finished frames 6200400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033541/1666666 [1:42:50<49:47, 211.92it/s]

finished frames 6201000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033629/1666666 [1:42:50<49:43, 212.16it/s]

finished frames 6201600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033739/1666666 [1:42:51<49:31, 213.02it/s]

finished frames 6202200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033827/1666666 [1:42:51<49:38, 212.45it/s]

finished frames 6202800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1033937/1666666 [1:42:52<49:45, 211.93it/s]

finished frames 6203400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034024/1666666 [1:42:52<51:18, 205.52it/s]

finished frames 6204000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034133/1666666 [1:42:53<49:55, 211.18it/s]

finished frames 6204600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034243/1666666 [1:42:53<49:32, 212.72it/s]

finished frames 6205200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034331/1666666 [1:42:53<49:39, 212.21it/s]

finished frames 6205800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034441/1666666 [1:42:54<49:24, 213.29it/s]

finished frames 6206400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034529/1666666 [1:42:54<49:31, 212.71it/s]

finished frames 6207000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034639/1666666 [1:42:55<51:29, 204.56it/s]

finished frames 6207600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034723/1666666 [1:42:55<51:03, 206.28it/s]

finished frames 6208200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034828/1666666 [1:42:56<50:45, 207.48it/s]

finished frames 6208800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1034934/1666666 [1:42:56<50:29, 208.56it/s]

finished frames 6209400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035040/1666666 [1:42:57<51:25, 204.74it/s]

finished frames 6210000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035124/1666666 [1:42:57<50:51, 206.95it/s]

finished frames 6210600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035229/1666666 [1:42:58<50:40, 207.66it/s]

finished frames 6211200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035334/1666666 [1:42:58<50:44, 207.38it/s]

finished frames 6211800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035419/1666666 [1:42:59<54:59, 191.30it/s]

finished frames 6212400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035524/1666666 [1:42:59<51:17, 205.07it/s]

finished frames 6213000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035629/1666666 [1:43:00<51:40, 203.53it/s]

finished frames 6213600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035734/1666666 [1:43:00<50:48, 206.99it/s]

finished frames 6214200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035839/1666666 [1:43:01<50:35, 207.82it/s]

finished frames 6214800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1035923/1666666 [1:43:01<50:36, 207.75it/s]

finished frames 6215400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036028/1666666 [1:43:02<51:40, 203.39it/s]

finished frames 6216000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036133/1666666 [1:43:02<50:40, 207.39it/s]

finished frames 6216600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036238/1666666 [1:43:03<50:22, 208.57it/s]

finished frames 6217200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036322/1666666 [1:43:03<50:42, 207.15it/s]

finished frames 6217800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036427/1666666 [1:43:04<50:39, 207.33it/s]

finished frames 6218400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036533/1666666 [1:43:04<50:26, 208.20it/s]

finished frames 6219000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036638/1666666 [1:43:05<50:23, 208.41it/s]

finished frames 6219600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036722/1666666 [1:43:05<50:23, 208.36it/s]

finished frames 6220200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036829/1666666 [1:43:06<50:04, 209.61it/s]

finished frames 6220800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1036934/1666666 [1:43:06<50:30, 207.81it/s]

finished frames 6221400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037039/1666666 [1:43:07<51:57, 201.96it/s]

finished frames 6222000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037126/1666666 [1:43:07<49:54, 210.26it/s]

finished frames 6222600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037236/1666666 [1:43:08<49:06, 213.62it/s]

finished frames 6223200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037324/1666666 [1:43:08<49:12, 213.17it/s]

finished frames 6223800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037434/1666666 [1:43:08<49:38, 211.24it/s]

finished frames 6224400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037522/1666666 [1:43:09<49:47, 210.59it/s]

finished frames 6225000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037632/1666666 [1:43:09<49:43, 210.82it/s]

finished frames 6225600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037742/1666666 [1:43:10<49:32, 211.60it/s]

finished frames 6226200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037830/1666666 [1:43:10<50:57, 205.68it/s]

finished frames 6226800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1037938/1666666 [1:43:11<52:05, 201.15it/s]

finished frames 6227400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038022/1666666 [1:43:11<51:37, 202.92it/s]

finished frames 6228000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038129/1666666 [1:43:12<50:16, 208.34it/s]

finished frames 6228600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038239/1666666 [1:43:12<49:27, 211.78it/s]

finished frames 6229200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038327/1666666 [1:43:13<49:11, 212.86it/s]

finished frames 6229800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038437/1666666 [1:43:13<49:11, 212.88it/s]

finished frames 6230400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038525/1666666 [1:43:14<49:12, 212.74it/s]

finished frames 6231000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038635/1666666 [1:43:14<49:04, 213.30it/s]

finished frames 6231600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038723/1666666 [1:43:15<49:09, 212.88it/s]

finished frames 6232200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038833/1666666 [1:43:15<48:50, 214.24it/s]

finished frames 6232800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1038943/1666666 [1:43:16<48:48, 214.34it/s]

finished frames 6233400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039031/1666666 [1:43:16<49:59, 209.28it/s]

finished frames 6234000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039141/1666666 [1:43:17<48:57, 213.59it/s]

finished frames 6234600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039229/1666666 [1:43:17<48:47, 214.34it/s]

finished frames 6235200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039339/1666666 [1:43:18<48:47, 214.29it/s]

finished frames 6235800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039427/1666666 [1:43:18<49:02, 213.15it/s]

finished frames 6236400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039537/1666666 [1:43:18<48:55, 213.66it/s]

finished frames 6237000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039625/1666666 [1:43:19<48:53, 213.79it/s]

finished frames 6237600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039735/1666666 [1:43:19<48:42, 214.51it/s]

finished frames 6238200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039823/1666666 [1:43:20<48:45, 214.29it/s]

finished frames 6238800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1039933/1666666 [1:43:20<48:40, 214.59it/s]

finished frames 6239400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040021/1666666 [1:43:21<50:29, 206.88it/s]

finished frames 6240000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040131/1666666 [1:43:21<49:01, 212.97it/s]

finished frames 6240600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040241/1666666 [1:43:22<49:53, 209.28it/s]

finished frames 6241200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040328/1666666 [1:43:22<50:36, 206.27it/s]

finished frames 6241800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040438/1666666 [1:43:23<49:15, 211.88it/s]

finished frames 6242400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040524/1666666 [1:43:23<49:52, 209.24it/s]

finished frames 6243000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040629/1666666 [1:43:24<50:20, 207.25it/s]

finished frames 6243600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040734/1666666 [1:43:24<50:14, 207.63it/s]

finished frames 6244200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040839/1666666 [1:43:25<50:05, 208.22it/s]

finished frames 6244800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1040923/1666666 [1:43:25<50:18, 207.31it/s]

finished frames 6245400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1041028/1666666 [1:43:26<51:37, 202.00it/s]

finished frames 6246000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1041133/1666666 [1:43:26<50:29, 206.48it/s]

finished frames 6246600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1041238/1666666 [1:43:27<50:25, 206.73it/s]

finished frames 6247200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1041323/1666666 [1:43:27<50:10, 207.72it/s]

finished frames 6247800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1041428/1666666 [1:43:28<50:27, 206.52it/s]

finished frames 6248400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1041533/1666666 [1:43:28<50:30, 206.26it/s]

finished frames 6249000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 62%|██████▏   | 1041638/1666666 [1:43:29<50:22, 206.78it/s]

finished frames 6249600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1041722/1666666 [1:43:29<50:27, 206.40it/s]

finished frames 6250200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1041827/1666666 [1:43:29<50:29, 206.26it/s]

finished frames 6250800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1041932/1666666 [1:43:30<50:33, 205.93it/s]

finished frames 6251400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042037/1666666 [1:43:31<51:35, 201.81it/s]

finished frames 6252000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042142/1666666 [1:43:31<50:37, 205.60it/s]

finished frames 6252600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042226/1666666 [1:43:31<50:36, 205.64it/s]

finished frames 6253200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042332/1666666 [1:43:32<50:03, 207.86it/s]

finished frames 6253800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042438/1666666 [1:43:32<49:46, 209.01it/s]

finished frames 6254400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042524/1666666 [1:43:33<51:32, 201.86it/s]

finished frames 6255000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042633/1666666 [1:43:33<51:37, 201.47it/s]

finished frames 6255600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042742/1666666 [1:43:34<49:32, 209.91it/s]

finished frames 6256200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042830/1666666 [1:43:34<49:12, 211.30it/s]

finished frames 6256800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1042940/1666666 [1:43:35<48:53, 212.64it/s]

finished frames 6257400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043028/1666666 [1:43:35<49:57, 208.08it/s]

finished frames 6258000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043137/1666666 [1:43:36<49:06, 211.62it/s]

finished frames 6258600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043225/1666666 [1:43:36<48:51, 212.64it/s]

finished frames 6259200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043335/1666666 [1:43:37<48:42, 213.28it/s]

finished frames 6259800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043423/1666666 [1:43:37<48:52, 212.50it/s]

finished frames 6260400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043533/1666666 [1:43:38<48:43, 213.16it/s]

finished frames 6261000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043643/1666666 [1:43:38<48:42, 213.21it/s]

finished frames 6261600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043731/1666666 [1:43:39<48:43, 213.08it/s]

finished frames 6262200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043841/1666666 [1:43:39<48:31, 213.90it/s]

finished frames 6262800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1043929/1666666 [1:43:40<48:42, 213.06it/s]

finished frames 6263400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044039/1666666 [1:43:40<49:38, 209.07it/s]

finished frames 6264000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044126/1666666 [1:43:40<48:48, 212.54it/s]

finished frames 6264600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044236/1666666 [1:43:41<48:31, 213.78it/s]

finished frames 6265200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044324/1666666 [1:43:41<48:39, 213.14it/s]

finished frames 6265800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044434/1666666 [1:43:42<48:44, 212.80it/s]

finished frames 6266400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044544/1666666 [1:43:42<48:28, 213.87it/s]

finished frames 6267000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044632/1666666 [1:43:43<48:30, 213.74it/s]

finished frames 6267600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044742/1666666 [1:43:43<48:27, 213.87it/s]

finished frames 6268200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044830/1666666 [1:43:44<48:28, 213.82it/s]

finished frames 6268800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1044940/1666666 [1:43:44<49:25, 209.68it/s]

finished frames 6269400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045026/1666666 [1:43:45<51:37, 200.72it/s]

finished frames 6270000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045136/1666666 [1:43:45<48:45, 212.48it/s]

finished frames 6270600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045224/1666666 [1:43:46<48:18, 214.42it/s]

finished frames 6271200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045334/1666666 [1:43:46<48:11, 214.90it/s]

finished frames 6271800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045422/1666666 [1:43:47<48:22, 214.04it/s]

finished frames 6272400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045532/1666666 [1:43:47<48:08, 215.04it/s]

finished frames 6273000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045642/1666666 [1:43:48<48:07, 215.06it/s]

finished frames 6273600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045730/1666666 [1:43:48<48:08, 214.96it/s]

finished frames 6274200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045840/1666666 [1:43:49<48:09, 214.85it/s]

finished frames 6274800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1045928/1666666 [1:43:49<48:04, 215.17it/s]

finished frames 6275400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046038/1666666 [1:43:50<48:59, 211.14it/s]

finished frames 6276000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046126/1666666 [1:43:50<48:10, 214.70it/s]

finished frames 6276600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046236/1666666 [1:43:50<48:03, 215.16it/s]

finished frames 6277200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046324/1666666 [1:43:51<48:03, 215.16it/s]

finished frames 6277800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046434/1666666 [1:43:51<47:52, 215.94it/s]

finished frames 6278400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046544/1666666 [1:43:52<47:53, 215.79it/s]

finished frames 6279000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046632/1666666 [1:43:52<47:52, 215.89it/s]

finished frames 6279600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046742/1666666 [1:43:53<47:48, 216.10it/s]

finished frames 6280200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046830/1666666 [1:43:53<48:31, 212.87it/s]

finished frames 6280800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1046940/1666666 [1:43:54<47:26, 217.74it/s]

finished frames 6281400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047028/1666666 [1:43:54<48:41, 212.06it/s]

finished frames 6282000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047138/1666666 [1:43:55<48:12, 214.16it/s]

finished frames 6282600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047226/1666666 [1:43:55<48:27, 213.02it/s]

finished frames 6283200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047338/1666666 [1:43:56<47:20, 218.00it/s]

finished frames 6283800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047428/1666666 [1:43:56<47:00, 219.58it/s]

finished frames 6284400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047538/1666666 [1:43:56<47:07, 218.98it/s]

finished frames 6285000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047628/1666666 [1:43:57<47:01, 219.37it/s]

finished frames 6285600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047740/1666666 [1:43:57<47:06, 218.95it/s]

finished frames 6286200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047828/1666666 [1:43:58<48:02, 214.71it/s]

finished frames 6286800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1047939/1666666 [1:43:58<47:47, 215.81it/s]

finished frames 6287400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048028/1666666 [1:43:59<49:01, 210.34it/s]

finished frames 6288000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048138/1666666 [1:43:59<48:34, 212.20it/s]

finished frames 6288600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048226/1666666 [1:44:00<48:40, 211.79it/s]

finished frames 6289200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048336/1666666 [1:44:00<48:16, 213.49it/s]

finished frames 6289800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048424/1666666 [1:44:01<48:37, 211.91it/s]

finished frames 6290400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048533/1666666 [1:44:01<48:34, 212.09it/s]

finished frames 6291000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048643/1666666 [1:44:02<48:08, 213.97it/s]

finished frames 6291600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048731/1666666 [1:44:02<48:07, 213.97it/s]

finished frames 6292200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048841/1666666 [1:44:03<48:14, 213.42it/s]

finished frames 6292800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1048929/1666666 [1:44:03<48:17, 213.22it/s]

finished frames 6293400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049039/1666666 [1:44:04<49:20, 208.63it/s]

finished frames 6294000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049126/1666666 [1:44:04<48:35, 211.83it/s]

finished frames 6294600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049236/1666666 [1:44:04<48:10, 213.60it/s]

finished frames 6295200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049324/1666666 [1:44:05<48:20, 212.81it/s]

finished frames 6295800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049434/1666666 [1:44:05<48:26, 212.36it/s]

finished frames 6296400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049544/1666666 [1:44:06<48:14, 213.23it/s]

finished frames 6297000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049632/1666666 [1:44:06<48:25, 212.34it/s]

finished frames 6297600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049742/1666666 [1:44:07<49:47, 206.51it/s]

finished frames 6298200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049829/1666666 [1:44:07<50:54, 201.96it/s]

finished frames 6298800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1049938/1666666 [1:44:08<48:44, 210.85it/s]

finished frames 6299400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050025/1666666 [1:44:08<49:45, 206.57it/s]

finished frames 6300000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050134/1666666 [1:44:09<48:39, 211.19it/s]

finished frames 6300600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050240/1666666 [1:44:09<49:17, 208.44it/s]

finished frames 6301200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050324/1666666 [1:44:10<49:32, 207.37it/s]

finished frames 6301800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050430/1666666 [1:44:10<49:24, 207.84it/s]

finished frames 6302400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050537/1666666 [1:44:11<49:11, 208.74it/s]

finished frames 6303000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050623/1666666 [1:44:11<48:43, 210.72it/s]

finished frames 6303600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050733/1666666 [1:44:12<48:12, 212.92it/s]

finished frames 6304200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050843/1666666 [1:44:12<48:00, 213.83it/s]

finished frames 6304800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1050931/1666666 [1:44:13<48:05, 213.39it/s]

finished frames 6305400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051041/1666666 [1:44:13<49:00, 209.37it/s]

finished frames 6306000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051129/1666666 [1:44:13<47:58, 213.80it/s]

finished frames 6306600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051239/1666666 [1:44:14<47:57, 213.90it/s]

finished frames 6307200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051327/1666666 [1:44:14<47:54, 214.10it/s]

finished frames 6307800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051437/1666666 [1:44:15<47:59, 213.65it/s]

finished frames 6308400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051525/1666666 [1:44:15<47:55, 213.92it/s]

finished frames 6309000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051635/1666666 [1:44:16<48:05, 213.15it/s]

finished frames 6309600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051723/1666666 [1:44:16<48:13, 212.52it/s]

finished frames 6310200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051833/1666666 [1:44:17<48:07, 212.90it/s]

finished frames 6310800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1051943/1666666 [1:44:17<48:03, 213.20it/s]

finished frames 6311400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052031/1666666 [1:44:18<49:13, 208.13it/s]

finished frames 6312000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052140/1666666 [1:44:18<48:17, 212.06it/s]

finished frames 6312600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052224/1666666 [1:44:19<51:05, 200.45it/s]

finished frames 6313200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052333/1666666 [1:44:19<48:36, 210.66it/s]

finished frames 6313800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052443/1666666 [1:44:20<48:03, 213.00it/s]

finished frames 6314400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052531/1666666 [1:44:20<47:55, 213.58it/s]

finished frames 6315000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052641/1666666 [1:44:21<47:54, 213.60it/s]

finished frames 6315600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052729/1666666 [1:44:21<48:00, 213.13it/s]

finished frames 6316200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052839/1666666 [1:44:22<48:00, 213.11it/s]

finished frames 6316800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1052927/1666666 [1:44:22<47:58, 213.19it/s]

finished frames 6317400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053037/1666666 [1:44:23<49:04, 208.37it/s]

finished frames 6318000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053125/1666666 [1:44:23<48:24, 211.22it/s]

finished frames 6318600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053235/1666666 [1:44:23<48:01, 212.91it/s]

finished frames 6319200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053323/1666666 [1:44:24<47:55, 213.30it/s]

finished frames 6319800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053433/1666666 [1:44:24<48:15, 211.78it/s]

finished frames 6320400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053543/1666666 [1:44:25<47:48, 213.72it/s]

finished frames 6321000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053631/1666666 [1:44:25<48:01, 212.76it/s]

finished frames 6321600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053741/1666666 [1:44:26<48:04, 212.52it/s]

finished frames 6322200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053829/1666666 [1:44:26<47:54, 213.19it/s]

finished frames 6322800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1053939/1666666 [1:44:27<47:58, 212.90it/s]

finished frames 6323400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054027/1666666 [1:44:27<49:09, 207.72it/s]

finished frames 6324000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054136/1666666 [1:44:28<48:07, 212.17it/s]

finished frames 6324600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054224/1666666 [1:44:28<47:56, 212.88it/s]

finished frames 6325200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054334/1666666 [1:44:29<47:50, 213.30it/s]

finished frames 6325800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054422/1666666 [1:44:29<52:02, 196.09it/s]

finished frames 6326400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054531/1666666 [1:44:30<48:41, 209.53it/s]

finished frames 6327000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054640/1666666 [1:44:30<48:08, 211.86it/s]

finished frames 6327600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054728/1666666 [1:44:31<47:56, 212.74it/s]

finished frames 6328200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054838/1666666 [1:44:31<47:45, 213.50it/s]

finished frames 6328800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1054926/1666666 [1:44:31<47:53, 212.92it/s]

finished frames 6329400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055036/1666666 [1:44:32<49:01, 207.90it/s]

finished frames 6330000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055124/1666666 [1:44:32<48:18, 210.96it/s]

finished frames 6330600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055234/1666666 [1:44:33<47:54, 212.72it/s]

finished frames 6331200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055344/1666666 [1:44:33<47:48, 213.13it/s]

finished frames 6331800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055432/1666666 [1:44:34<47:52, 212.82it/s]

finished frames 6332400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055542/1666666 [1:44:34<47:50, 212.93it/s]

finished frames 6333000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055630/1666666 [1:44:35<47:50, 212.85it/s]

finished frames 6333600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055740/1666666 [1:44:35<47:40, 213.58it/s]

finished frames 6334200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055828/1666666 [1:44:36<47:49, 212.87it/s]

finished frames 6334800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1055938/1666666 [1:44:36<47:46, 213.08it/s]

finished frames 6335400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056025/1666666 [1:44:37<49:11, 206.91it/s]

finished frames 6336000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056135/1666666 [1:44:37<48:02, 211.83it/s]

finished frames 6336600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056223/1666666 [1:44:38<47:56, 212.18it/s]

finished frames 6337200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056333/1666666 [1:44:38<47:23, 214.62it/s]

finished frames 6337800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056443/1666666 [1:44:39<47:20, 214.86it/s]

finished frames 6338400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056531/1666666 [1:44:39<47:17, 214.99it/s]

finished frames 6339000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056641/1666666 [1:44:40<47:16, 215.03it/s]

finished frames 6339600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056729/1666666 [1:44:40<47:21, 214.64it/s]

finished frames 6340200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056839/1666666 [1:44:40<47:16, 214.99it/s]

finished frames 6340800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1056927/1666666 [1:44:41<47:15, 215.01it/s]

finished frames 6341400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057037/1666666 [1:44:41<48:17, 210.38it/s]

finished frames 6342000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057125/1666666 [1:44:42<47:34, 213.52it/s]

finished frames 6342600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057235/1666666 [1:44:42<47:18, 214.68it/s]

finished frames 6343200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057323/1666666 [1:44:43<47:21, 214.41it/s]

finished frames 6343800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057433/1666666 [1:44:43<47:18, 214.63it/s]

finished frames 6344400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057543/1666666 [1:44:44<47:16, 214.75it/s]

finished frames 6345000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057631/1666666 [1:44:44<47:20, 214.42it/s]

finished frames 6345600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057741/1666666 [1:44:45<47:19, 214.48it/s]

finished frames 6346200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057829/1666666 [1:44:45<47:16, 214.66it/s]

finished frames 6346800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1057939/1666666 [1:44:46<47:19, 214.38it/s]

finished frames 6347400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1058027/1666666 [1:44:46<48:22, 209.72it/s]

finished frames 6348000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1058137/1666666 [1:44:47<47:25, 213.89it/s]

finished frames 6348600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 63%|██████▎   | 1058223/1666666 [1:44:47<48:08, 210.64it/s]

finished frames 6349200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1058333/1666666 [1:44:47<48:03, 210.95it/s]

finished frames 6349800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1058443/1666666 [1:44:48<48:11, 210.38it/s]

finished frames 6350400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1058530/1666666 [1:44:48<48:13, 210.18it/s]

finished frames 6351000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1058639/1666666 [1:44:49<48:18, 209.77it/s]

finished frames 6351600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1058724/1666666 [1:44:49<48:16, 209.88it/s]

finished frames 6352200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1058833/1666666 [1:44:50<48:10, 210.27it/s]

finished frames 6352800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1058943/1666666 [1:44:50<48:00, 211.00it/s]

finished frames 6353400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059029/1666666 [1:44:51<49:21, 205.20it/s]

finished frames 6354000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059137/1666666 [1:44:51<48:25, 209.11it/s]

finished frames 6354600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059223/1666666 [1:44:52<51:26, 196.82it/s]

finished frames 6355200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059330/1666666 [1:44:52<48:46, 207.50it/s]

finished frames 6355800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059438/1666666 [1:44:53<48:12, 209.94it/s]

finished frames 6356400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059526/1666666 [1:44:53<48:11, 209.97it/s]

finished frames 6357000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059636/1666666 [1:44:54<48:06, 210.28it/s]

finished frames 6357600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059722/1666666 [1:44:54<48:36, 208.08it/s]

finished frames 6358200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059831/1666666 [1:44:55<48:08, 210.10it/s]

finished frames 6358800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1059937/1666666 [1:44:55<48:36, 208.01it/s]

finished frames 6359400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060021/1666666 [1:44:56<50:02, 202.04it/s]

finished frames 6360000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060126/1666666 [1:44:56<48:58, 206.43it/s]

finished frames 6360600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060232/1666666 [1:44:57<48:27, 208.57it/s]

finished frames 6361200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060339/1666666 [1:44:57<48:18, 209.20it/s]

finished frames 6361800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060425/1666666 [1:44:58<48:19, 209.09it/s]

finished frames 6362400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060531/1666666 [1:44:58<48:31, 208.18it/s]

finished frames 6363000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060637/1666666 [1:44:59<48:26, 208.51it/s]

finished frames 6363600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060742/1666666 [1:44:59<48:29, 208.29it/s]

finished frames 6364200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060827/1666666 [1:44:59<48:26, 208.43it/s]

finished frames 6364800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1060933/1666666 [1:45:00<48:27, 208.36it/s]

finished frames 6365400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061039/1666666 [1:45:00<49:11, 205.16it/s]

finished frames 6366000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061126/1666666 [1:45:01<47:51, 210.84it/s]

finished frames 6366600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061236/1666666 [1:45:01<47:27, 212.61it/s]

finished frames 6367200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061324/1666666 [1:45:02<47:36, 211.92it/s]

finished frames 6367800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061432/1666666 [1:45:02<48:22, 208.53it/s]

finished frames 6368400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061537/1666666 [1:45:03<48:41, 207.14it/s]

finished frames 6369000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061642/1666666 [1:45:03<49:39, 203.08it/s]

finished frames 6369600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061726/1666666 [1:45:04<49:06, 205.28it/s]

finished frames 6370200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061831/1666666 [1:45:04<49:15, 204.63it/s]

finished frames 6370800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1061936/1666666 [1:45:05<49:09, 205.04it/s]

finished frames 6371400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1062020/1666666 [1:45:05<50:28, 199.68it/s]

finished frames 6372000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1062125/1666666 [1:45:06<48:57, 205.77it/s]

finished frames 6372600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1062230/1666666 [1:45:06<48:39, 207.02it/s]

finished frames 6373200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1062335/1666666 [1:45:07<48:30, 207.61it/s]

finished frames 6373800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▎   | 1062440/1666666 [1:45:07<48:21, 208.28it/s]

finished frames 6374400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1062524/1666666 [1:45:08<48:29, 207.62it/s]

finished frames 6375000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1062629/1666666 [1:45:08<48:34, 207.26it/s]

finished frames 6375600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1062734/1666666 [1:45:09<48:34, 207.25it/s]

finished frames 6376200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1062839/1666666 [1:45:09<48:27, 207.69it/s]

finished frames 6376800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1062923/1666666 [1:45:10<48:36, 207.01it/s]

finished frames 6377400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063028/1666666 [1:45:10<49:39, 202.61it/s]

finished frames 6378000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063133/1666666 [1:45:11<48:51, 205.90it/s]

finished frames 6378600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063238/1666666 [1:45:11<48:27, 207.55it/s]

finished frames 6379200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063322/1666666 [1:45:12<48:34, 207.00it/s]

finished frames 6379800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063427/1666666 [1:45:12<48:33, 207.08it/s]

finished frames 6380400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063532/1666666 [1:45:13<48:21, 207.87it/s]

finished frames 6381000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063637/1666666 [1:45:13<48:25, 207.53it/s]

finished frames 6381600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063742/1666666 [1:45:14<48:25, 207.52it/s]

finished frames 6382200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063826/1666666 [1:45:14<48:19, 207.92it/s]

finished frames 6382800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1063931/1666666 [1:45:15<51:17, 195.85it/s]

finished frames 6383400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064035/1666666 [1:45:15<51:27, 195.19it/s]

finished frames 6384000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064140/1666666 [1:45:16<48:49, 205.66it/s]

finished frames 6384600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064224/1666666 [1:45:16<48:26, 207.30it/s]

finished frames 6385200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064329/1666666 [1:45:16<48:27, 207.14it/s]

finished frames 6385800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064437/1666666 [1:45:17<47:31, 211.17it/s]

finished frames 6386400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064525/1666666 [1:45:17<47:22, 211.82it/s]

finished frames 6387000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064635/1666666 [1:45:18<46:59, 213.54it/s]

finished frames 6387600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064723/1666666 [1:45:18<47:02, 213.24it/s]

finished frames 6388200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064833/1666666 [1:45:19<47:40, 210.41it/s]

finished frames 6388800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1064943/1666666 [1:45:19<46:37, 215.08it/s]

finished frames 6389400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065031/1666666 [1:45:20<47:49, 209.68it/s]

finished frames 6390000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065141/1666666 [1:45:20<46:58, 213.45it/s]

finished frames 6390600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065229/1666666 [1:45:21<46:51, 213.89it/s]

finished frames 6391200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065339/1666666 [1:45:21<46:47, 214.22it/s]

finished frames 6391800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065427/1666666 [1:45:22<46:47, 214.17it/s]

finished frames 6392400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065537/1666666 [1:45:22<46:44, 214.36it/s]

finished frames 6393000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065625/1666666 [1:45:23<46:55, 213.51it/s]

finished frames 6393600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065735/1666666 [1:45:23<46:46, 214.14it/s]

finished frames 6394200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065823/1666666 [1:45:24<46:37, 214.79it/s]

finished frames 6394800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1065933/1666666 [1:45:24<46:51, 213.69it/s]

finished frames 6395400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066021/1666666 [1:45:24<48:20, 207.11it/s]

finished frames 6396000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066131/1666666 [1:45:25<47:06, 212.47it/s]

finished frames 6396600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066219/1666666 [1:45:25<46:54, 213.32it/s]

finished frames 6397200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066328/1666666 [1:45:26<47:54, 208.82it/s]

finished frames 6397800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066436/1666666 [1:45:26<48:16, 207.24it/s]

finished frames 6398400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066523/1666666 [1:45:27<47:24, 210.96it/s]

finished frames 6399000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066633/1666666 [1:45:27<47:11, 211.95it/s]

finished frames 6399600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066743/1666666 [1:45:28<46:49, 213.52it/s]

finished frames 6400200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066831/1666666 [1:45:28<46:40, 214.18it/s]

finished frames 6400800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1066941/1666666 [1:45:29<46:38, 214.26it/s]

finished frames 6401400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067029/1666666 [1:45:29<48:04, 207.89it/s]

finished frames 6402000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067138/1666666 [1:45:30<47:10, 211.83it/s]

finished frames 6402600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067226/1666666 [1:45:30<47:12, 211.63it/s]

finished frames 6403200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067336/1666666 [1:45:31<46:56, 212.76it/s]

finished frames 6403800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067424/1666666 [1:45:31<46:56, 212.74it/s]

finished frames 6404400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067534/1666666 [1:45:32<46:55, 212.79it/s]

finished frames 6405000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067622/1666666 [1:45:32<46:59, 212.48it/s]

finished frames 6405600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067732/1666666 [1:45:33<46:59, 212.42it/s]

finished frames 6406200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067842/1666666 [1:45:33<47:01, 212.22it/s]

finished frames 6406800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1067930/1666666 [1:45:34<47:06, 211.84it/s]

finished frames 6407400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068040/1666666 [1:45:34<47:56, 208.09it/s]

finished frames 6408000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068127/1666666 [1:45:34<47:16, 211.01it/s]

finished frames 6408600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068237/1666666 [1:45:35<46:53, 212.69it/s]

finished frames 6409200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068325/1666666 [1:45:35<47:01, 212.07it/s]

finished frames 6409800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068435/1666666 [1:45:36<47:01, 212.02it/s]

finished frames 6410400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068523/1666666 [1:45:36<46:57, 212.30it/s]

finished frames 6411000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068633/1666666 [1:45:37<49:48, 200.11it/s]

finished frames 6411600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068742/1666666 [1:45:37<47:27, 209.96it/s]

finished frames 6412200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068830/1666666 [1:45:38<47:15, 210.82it/s]

finished frames 6412800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1068940/1666666 [1:45:38<46:56, 212.26it/s]

finished frames 6413400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069028/1666666 [1:45:39<47:54, 207.88it/s]

finished frames 6414000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069137/1666666 [1:45:39<46:58, 212.02it/s]

finished frames 6414600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069225/1666666 [1:45:40<46:46, 212.85it/s]

finished frames 6415200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069335/1666666 [1:45:40<46:39, 213.36it/s]

finished frames 6415800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069423/1666666 [1:45:41<46:39, 213.33it/s]

finished frames 6416400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069533/1666666 [1:45:41<46:47, 212.69it/s]

finished frames 6417000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069643/1666666 [1:45:42<46:38, 213.36it/s]

finished frames 6417600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069731/1666666 [1:45:42<46:30, 213.89it/s]

finished frames 6418200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069841/1666666 [1:45:43<47:12, 210.71it/s]

finished frames 6418800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1069926/1666666 [1:45:43<47:47, 208.10it/s]

finished frames 6419400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070031/1666666 [1:45:44<49:16, 201.82it/s]

finished frames 6420000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070136/1666666 [1:45:44<48:23, 205.44it/s]

finished frames 6420600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070241/1666666 [1:45:45<48:06, 206.62it/s]

finished frames 6421200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070327/1666666 [1:45:45<47:22, 209.82it/s]

finished frames 6421800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070437/1666666 [1:45:45<47:02, 211.24it/s]

finished frames 6422400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070525/1666666 [1:45:46<47:03, 211.11it/s]

finished frames 6423000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070635/1666666 [1:45:46<46:54, 211.81it/s]

finished frames 6423600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070723/1666666 [1:45:47<46:54, 211.73it/s]

finished frames 6424200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070833/1666666 [1:45:47<46:43, 212.50it/s]

finished frames 6424800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1070943/1666666 [1:45:48<46:35, 213.12it/s]

finished frames 6425400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071029/1666666 [1:45:48<49:32, 200.40it/s]

finished frames 6426000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071138/1666666 [1:45:49<49:29, 200.58it/s]

finished frames 6426600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071225/1666666 [1:45:49<47:30, 208.86it/s]

finished frames 6427200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071331/1666666 [1:45:50<47:39, 208.18it/s]

finished frames 6427800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071436/1666666 [1:45:50<47:39, 208.15it/s]

finished frames 6428400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071542/1666666 [1:45:51<47:34, 208.52it/s]

finished frames 6429000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071626/1666666 [1:45:51<47:46, 207.60it/s]

finished frames 6429600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071731/1666666 [1:45:52<47:37, 208.22it/s]

finished frames 6430200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071836/1666666 [1:45:52<47:43, 207.75it/s]

finished frames 6430800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1071941/1666666 [1:45:53<48:03, 206.27it/s]

finished frames 6431400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072025/1666666 [1:45:53<49:19, 200.91it/s]

finished frames 6432000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072134/1666666 [1:45:54<46:20, 213.80it/s]

finished frames 6432600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072224/1666666 [1:45:54<46:21, 213.68it/s]

finished frames 6433200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072336/1666666 [1:45:55<46:04, 214.96it/s]

finished frames 6433800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072424/1666666 [1:45:55<46:53, 211.25it/s]

finished frames 6434400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072535/1666666 [1:45:56<45:41, 216.75it/s]

finished frames 6435000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072624/1666666 [1:45:56<45:16, 218.71it/s]

finished frames 6435600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072734/1666666 [1:45:56<47:20, 209.12it/s]

finished frames 6436200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072839/1666666 [1:45:57<49:03, 201.76it/s]

finished frames 6436800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1072921/1666666 [1:45:57<49:41, 199.15it/s]

finished frames 6437400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073023/1666666 [1:45:58<51:44, 191.24it/s]

finished frames 6438000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073123/1666666 [1:45:58<50:46, 194.84it/s]

finished frames 6438600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073228/1666666 [1:45:59<49:07, 201.33it/s]

finished frames 6439200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073336/1666666 [1:45:59<46:38, 212.02it/s]

finished frames 6439800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073424/1666666 [1:46:00<45:51, 215.64it/s]

finished frames 6440400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073534/1666666 [1:46:00<48:19, 204.55it/s]

finished frames 6441000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073644/1666666 [1:46:01<46:42, 211.59it/s]

finished frames 6441600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073731/1666666 [1:46:01<47:26, 208.27it/s]

finished frames 6442200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073840/1666666 [1:46:02<46:35, 212.10it/s]

finished frames 6442800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1073928/1666666 [1:46:02<46:31, 212.36it/s]

finished frames 6443400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074038/1666666 [1:46:03<47:18, 208.80it/s]

finished frames 6444000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074125/1666666 [1:46:03<46:29, 212.38it/s]

finished frames 6444600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074235/1666666 [1:46:04<46:17, 213.30it/s]

finished frames 6445200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074323/1666666 [1:46:04<46:15, 213.38it/s]

finished frames 6445800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074433/1666666 [1:46:05<46:17, 213.22it/s]

finished frames 6446400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074543/1666666 [1:46:05<46:08, 213.91it/s]

finished frames 6447000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074631/1666666 [1:46:06<46:07, 213.91it/s]

finished frames 6447600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074741/1666666 [1:46:06<46:05, 214.06it/s]

finished frames 6448200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074829/1666666 [1:46:07<46:11, 213.58it/s]

finished frames 6448800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 64%|██████▍   | 1074939/1666666 [1:46:07<46:18, 212.94it/s]

finished frames 6449400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075027/1666666 [1:46:07<47:13, 208.77it/s]

finished frames 6450000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075137/1666666 [1:46:08<46:24, 212.44it/s]

finished frames 6450600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075225/1666666 [1:46:08<46:17, 212.92it/s]

finished frames 6451200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075335/1666666 [1:46:09<46:10, 213.46it/s]

finished frames 6451800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075423/1666666 [1:46:09<46:12, 213.27it/s]

finished frames 6452400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075533/1666666 [1:46:10<46:04, 213.87it/s]

finished frames 6453000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075643/1666666 [1:46:10<46:05, 213.74it/s]

finished frames 6453600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075730/1666666 [1:46:11<48:31, 202.99it/s]

finished frames 6454200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075836/1666666 [1:46:11<50:01, 196.86it/s]

finished frames 6454800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1075922/1666666 [1:46:12<47:40, 206.54it/s]

finished frames 6455400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076030/1666666 [1:46:12<47:55, 205.39it/s]

finished frames 6456000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076138/1666666 [1:46:13<46:46, 210.41it/s]

finished frames 6456600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076226/1666666 [1:46:13<46:45, 210.45it/s]

finished frames 6457200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076336/1666666 [1:46:14<46:41, 210.70it/s]

finished frames 6457800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076423/1666666 [1:46:14<46:42, 210.61it/s]

finished frames 6458400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076533/1666666 [1:46:15<46:35, 211.11it/s]

finished frames 6459000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076643/1666666 [1:46:15<46:36, 211.00it/s]

finished frames 6459600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076731/1666666 [1:46:16<46:38, 210.77it/s]

finished frames 6460200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076841/1666666 [1:46:16<46:23, 211.89it/s]

finished frames 6460800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1076929/1666666 [1:46:17<46:28, 211.47it/s]

finished frames 6461400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077038/1666666 [1:46:17<47:33, 206.66it/s]

finished frames 6462000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077124/1666666 [1:46:17<46:52, 209.62it/s]

finished frames 6462600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077232/1666666 [1:46:18<46:37, 210.70it/s]

finished frames 6463200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077342/1666666 [1:46:19<46:34, 210.91it/s]

finished frames 6463800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077430/1666666 [1:46:19<46:37, 210.67it/s]

finished frames 6464400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077540/1666666 [1:46:19<46:30, 211.09it/s]

finished frames 6465000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077628/1666666 [1:46:20<46:25, 211.46it/s]

finished frames 6465600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077738/1666666 [1:46:20<46:24, 211.51it/s]

finished frames 6466200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077826/1666666 [1:46:21<46:27, 211.22it/s]

finished frames 6466800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1077936/1666666 [1:46:21<46:21, 211.66it/s]

finished frames 6467400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078023/1666666 [1:46:22<47:28, 206.67it/s]

finished frames 6468000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078131/1666666 [1:46:22<47:31, 206.42it/s]

finished frames 6468600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078239/1666666 [1:46:23<47:59, 204.38it/s]

finished frames 6469200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078326/1666666 [1:46:23<46:45, 209.69it/s]

finished frames 6469800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078436/1666666 [1:46:24<46:23, 211.35it/s]

finished frames 6470400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078524/1666666 [1:46:24<46:30, 210.76it/s]

finished frames 6471000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078634/1666666 [1:46:25<46:57, 208.69it/s]

finished frames 6471600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078740/1666666 [1:46:25<46:49, 209.23it/s]

finished frames 6472200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078825/1666666 [1:46:26<46:52, 209.01it/s]

finished frames 6472800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1078931/1666666 [1:46:26<47:24, 206.64it/s]

finished frames 6473400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079036/1666666 [1:46:27<48:29, 201.97it/s]

finished frames 6474000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079141/1666666 [1:46:27<47:39, 205.47it/s]

finished frames 6474600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079225/1666666 [1:46:28<47:43, 205.17it/s]

finished frames 6475200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079330/1666666 [1:46:28<47:38, 205.49it/s]

finished frames 6475800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079435/1666666 [1:46:29<47:30, 206.04it/s]

finished frames 6476400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079540/1666666 [1:46:29<47:22, 206.53it/s]

finished frames 6477000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079626/1666666 [1:46:30<47:09, 207.45it/s]

finished frames 6477600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079732/1666666 [1:46:30<46:42, 209.46it/s]

finished frames 6478200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079839/1666666 [1:46:31<46:32, 210.12it/s]

finished frames 6478800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1079926/1666666 [1:46:31<46:27, 210.47it/s]

finished frames 6479400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080034/1666666 [1:46:32<47:37, 205.28it/s]

finished frames 6480000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080142/1666666 [1:46:32<46:39, 209.49it/s]

finished frames 6480600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080228/1666666 [1:46:32<46:32, 210.03it/s]

finished frames 6481200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080338/1666666 [1:46:33<46:23, 210.61it/s]

finished frames 6481800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080426/1666666 [1:46:33<49:09, 198.76it/s]

finished frames 6482400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080533/1666666 [1:46:34<50:17, 194.25it/s]

finished frames 6483000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080638/1666666 [1:46:34<47:32, 205.45it/s]

finished frames 6483600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080743/1666666 [1:46:35<46:58, 207.85it/s]

finished frames 6484200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080827/1666666 [1:46:35<46:54, 208.12it/s]

finished frames 6484800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1080937/1666666 [1:46:36<45:57, 212.43it/s]

finished frames 6485400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081024/1666666 [1:46:36<47:09, 206.99it/s]

finished frames 6486000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081133/1666666 [1:46:37<46:14, 211.07it/s]

finished frames 6486600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081243/1666666 [1:46:37<45:56, 212.37it/s]

finished frames 6487200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081331/1666666 [1:46:38<46:09, 211.39it/s]

finished frames 6487800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081441/1666666 [1:46:38<46:03, 211.79it/s]

finished frames 6488400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081529/1666666 [1:46:39<46:01, 211.85it/s]

finished frames 6489000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081639/1666666 [1:46:39<45:56, 212.23it/s]

finished frames 6489600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081727/1666666 [1:46:40<45:51, 212.62it/s]

finished frames 6490200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081837/1666666 [1:46:40<45:57, 212.10it/s]

finished frames 6490800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1081925/1666666 [1:46:41<45:52, 212.44it/s]

finished frames 6491400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082035/1666666 [1:46:41<46:44, 208.46it/s]

finished frames 6492000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082144/1666666 [1:46:42<45:54, 212.24it/s]

finished frames 6492600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082232/1666666 [1:46:42<45:57, 211.95it/s]

finished frames 6493200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082342/1666666 [1:46:43<45:48, 212.61it/s]

finished frames 6493800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082430/1666666 [1:46:43<45:47, 212.62it/s]

finished frames 6494400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082540/1666666 [1:46:43<45:35, 213.55it/s]

finished frames 6495000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082628/1666666 [1:46:44<45:33, 213.65it/s]

finished frames 6495600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082738/1666666 [1:46:44<45:47, 212.50it/s]

finished frames 6496200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082826/1666666 [1:46:45<45:54, 211.97it/s]

finished frames 6496800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1082936/1666666 [1:46:45<46:58, 207.14it/s]

finished frames 6497400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1083022/1666666 [1:46:46<47:29, 204.83it/s]

finished frames 6498000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1083130/1666666 [1:46:46<46:29, 209.16it/s]

finished frames 6498600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1083237/1666666 [1:46:47<46:21, 209.73it/s]

finished frames 6499200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▍   | 1083323/1666666 [1:46:47<46:19, 209.84it/s]

finished frames 6499800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1083430/1666666 [1:46:48<46:39, 208.37it/s]

finished frames 6500400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1083537/1666666 [1:46:48<46:13, 210.25it/s]

finished frames 6501000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1083625/1666666 [1:46:49<45:56, 211.49it/s]

finished frames 6501600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1083735/1666666 [1:46:49<45:47, 212.17it/s]

finished frames 6502200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1083823/1666666 [1:46:50<45:55, 211.51it/s]

finished frames 6502800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1083933/1666666 [1:46:50<45:43, 212.41it/s]

finished frames 6503400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084021/1666666 [1:46:51<47:08, 205.99it/s]

finished frames 6504000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084127/1666666 [1:46:51<46:36, 208.30it/s]

finished frames 6504600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084233/1666666 [1:46:52<46:24, 209.17it/s]

finished frames 6505200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084341/1666666 [1:46:52<46:09, 210.25it/s]

finished frames 6505800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084428/1666666 [1:46:52<46:07, 210.35it/s]

finished frames 6506400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084538/1666666 [1:46:53<46:08, 210.25it/s]

finished frames 6507000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084625/1666666 [1:46:53<46:09, 210.18it/s]

finished frames 6507600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084734/1666666 [1:46:54<46:10, 210.08it/s]

finished frames 6508200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084843/1666666 [1:46:54<46:04, 210.49it/s]

finished frames 6508800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1084930/1666666 [1:46:55<46:13, 209.77it/s]

finished frames 6509400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085037/1666666 [1:46:55<47:03, 206.00it/s]

finished frames 6510000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085122/1666666 [1:46:56<46:34, 208.12it/s]

finished frames 6510600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085227/1666666 [1:46:56<50:47, 190.82it/s]

finished frames 6511200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085332/1666666 [1:46:57<47:40, 203.25it/s]

finished frames 6511800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085441/1666666 [1:46:57<46:00, 210.57it/s]

finished frames 6512400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085529/1666666 [1:46:58<46:07, 209.95it/s]

finished frames 6513000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085639/1666666 [1:46:58<45:53, 211.03it/s]

finished frames 6513600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085727/1666666 [1:46:59<45:58, 210.63it/s]

finished frames 6514200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085837/1666666 [1:46:59<46:03, 210.18it/s]

finished frames 6514800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1085923/1666666 [1:47:00<46:06, 209.89it/s]

finished frames 6515400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086031/1666666 [1:47:00<47:04, 205.56it/s]

finished frames 6516000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086138/1666666 [1:47:01<46:13, 209.28it/s]

finished frames 6516600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086223/1666666 [1:47:01<46:16, 209.04it/s]

finished frames 6517200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086330/1666666 [1:47:02<46:12, 209.30it/s]

finished frames 6517800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086437/1666666 [1:47:02<46:10, 209.44it/s]

finished frames 6518400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086522/1666666 [1:47:03<46:18, 208.76it/s]

finished frames 6519000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086631/1666666 [1:47:03<46:00, 210.14it/s]

finished frames 6519600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086738/1666666 [1:47:04<46:05, 209.69it/s]

finished frames 6520200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086823/1666666 [1:47:04<46:12, 209.15it/s]

finished frames 6520800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1086930/1666666 [1:47:04<46:10, 209.26it/s]

finished frames 6521400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087036/1666666 [1:47:05<47:02, 205.38it/s]

finished frames 6522000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087122/1666666 [1:47:05<46:05, 209.59it/s]

finished frames 6522600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087232/1666666 [1:47:06<45:46, 210.93it/s]

finished frames 6523200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087339/1666666 [1:47:06<46:16, 208.66it/s]

finished frames 6523800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087423/1666666 [1:47:07<46:35, 207.21it/s]

finished frames 6524400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087529/1666666 [1:47:07<46:12, 208.91it/s]

finished frames 6525000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087635/1666666 [1:47:08<48:05, 200.70it/s]

finished frames 6525600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087743/1666666 [1:47:08<46:13, 208.71it/s]

finished frames 6526200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087828/1666666 [1:47:09<46:11, 208.85it/s]

finished frames 6526800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1087934/1666666 [1:47:09<46:08, 209.05it/s]

finished frames 6527400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088040/1666666 [1:47:10<47:14, 204.12it/s]

finished frames 6528000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088124/1666666 [1:47:10<46:24, 207.75it/s]

finished frames 6528600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088229/1666666 [1:47:11<46:44, 206.23it/s]

finished frames 6529200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088334/1666666 [1:47:11<46:24, 207.69it/s]

finished frames 6529800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088440/1666666 [1:47:12<46:16, 208.28it/s]

finished frames 6530400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088524/1666666 [1:47:12<46:24, 207.63it/s]

finished frames 6531000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088630/1666666 [1:47:13<46:18, 208.05it/s]

finished frames 6531600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088735/1666666 [1:47:13<46:17, 208.09it/s]

finished frames 6532200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088840/1666666 [1:47:14<46:24, 207.53it/s]

finished frames 6532800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1088924/1666666 [1:47:14<46:31, 206.97it/s]

finished frames 6533400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089029/1666666 [1:47:15<46:58, 204.92it/s]

finished frames 6534000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089138/1666666 [1:47:15<45:38, 210.85it/s]

finished frames 6534600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089226/1666666 [1:47:16<45:20, 212.27it/s]

finished frames 6535200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089336/1666666 [1:47:16<45:12, 212.88it/s]

finished frames 6535800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089424/1666666 [1:47:16<45:06, 213.29it/s]

finished frames 6536400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089534/1666666 [1:47:17<45:01, 213.60it/s]

finished frames 6537000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089644/1666666 [1:47:18<45:00, 213.63it/s]

finished frames 6537600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089732/1666666 [1:47:18<45:16, 212.41it/s]

finished frames 6538200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089842/1666666 [1:47:18<45:05, 213.18it/s]

finished frames 6538800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1089930/1666666 [1:47:19<49:07, 195.64it/s]

finished frames 6539400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090037/1666666 [1:47:19<46:57, 204.68it/s]

finished frames 6540000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090125/1666666 [1:47:20<45:37, 210.64it/s]

finished frames 6540600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090235/1666666 [1:47:20<45:16, 212.18it/s]

finished frames 6541200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090323/1666666 [1:47:21<45:17, 212.08it/s]

finished frames 6541800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090433/1666666 [1:47:21<45:11, 212.54it/s]

finished frames 6542400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090543/1666666 [1:47:22<45:11, 212.47it/s]

finished frames 6543000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090631/1666666 [1:47:22<45:12, 212.33it/s]

finished frames 6543600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090741/1666666 [1:47:23<45:06, 212.82it/s]

finished frames 6544200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090829/1666666 [1:47:23<45:09, 212.50it/s]

finished frames 6544800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1090939/1666666 [1:47:24<45:05, 212.79it/s]

finished frames 6545400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1091027/1666666 [1:47:24<46:07, 208.00it/s]

finished frames 6546000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1091136/1666666 [1:47:25<45:14, 212.04it/s]

finished frames 6546600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1091224/1666666 [1:47:25<45:04, 212.75it/s]

finished frames 6547200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1091334/1666666 [1:47:26<45:21, 211.39it/s]

finished frames 6547800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1091422/1666666 [1:47:26<45:11, 212.18it/s]

finished frames 6548400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1091532/1666666 [1:47:26<45:04, 212.64it/s]

finished frames 6549000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 65%|██████▌   | 1091642/1666666 [1:47:27<45:02, 212.81it/s]

finished frames 6549600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1091730/1666666 [1:47:27<44:58, 213.06it/s]

finished frames 6550200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1091840/1666666 [1:47:28<45:03, 212.61it/s]

finished frames 6550800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1091928/1666666 [1:47:28<45:02, 212.67it/s]

finished frames 6551400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092038/1666666 [1:47:29<46:10, 207.41it/s]

finished frames 6552000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092126/1666666 [1:47:29<45:25, 210.78it/s]

finished frames 6552600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092236/1666666 [1:47:30<47:02, 203.54it/s]

finished frames 6553200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092322/1666666 [1:47:30<48:20, 198.04it/s]

finished frames 6553800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092432/1666666 [1:47:31<45:19, 211.17it/s]

finished frames 6554400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092542/1666666 [1:47:31<44:57, 212.80it/s]

finished frames 6555000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092630/1666666 [1:47:32<44:59, 212.64it/s]

finished frames 6555600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092740/1666666 [1:47:32<45:02, 212.37it/s]

finished frames 6556200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092828/1666666 [1:47:33<45:03, 212.22it/s]

finished frames 6556800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1092938/1666666 [1:47:33<44:56, 212.75it/s]

finished frames 6557400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093026/1666666 [1:47:34<45:55, 208.16it/s]

finished frames 6558000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093136/1666666 [1:47:34<44:52, 213.00it/s]

finished frames 6558600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093224/1666666 [1:47:35<44:36, 214.28it/s]

finished frames 6559200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093334/1666666 [1:47:35<44:35, 214.29it/s]

finished frames 6559800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093444/1666666 [1:47:36<44:30, 214.66it/s]

finished frames 6560400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093532/1666666 [1:47:36<44:34, 214.26it/s]

finished frames 6561000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093642/1666666 [1:47:36<44:23, 215.18it/s]

finished frames 6561600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093730/1666666 [1:47:37<44:22, 215.18it/s]

finished frames 6562200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093840/1666666 [1:47:37<44:18, 215.47it/s]

finished frames 6562800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1093928/1666666 [1:47:38<44:12, 215.93it/s]

finished frames 6563400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094038/1666666 [1:47:38<45:21, 210.42it/s]

finished frames 6564000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094126/1666666 [1:47:39<44:48, 212.93it/s]

finished frames 6564600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094236/1666666 [1:47:39<44:29, 214.44it/s]

finished frames 6565200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094324/1666666 [1:47:40<44:20, 215.13it/s]

finished frames 6565800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094434/1666666 [1:47:40<44:23, 214.81it/s]

finished frames 6566400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094544/1666666 [1:47:41<44:22, 214.87it/s]

finished frames 6567000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094632/1666666 [1:47:41<46:13, 206.26it/s]

finished frames 6567600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094742/1666666 [1:47:42<46:32, 204.78it/s]

finished frames 6568200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094830/1666666 [1:47:42<44:56, 212.09it/s]

finished frames 6568800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1094940/1666666 [1:47:43<44:13, 215.47it/s]

finished frames 6569400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095027/1666666 [1:47:43<45:59, 207.18it/s]

finished frames 6570000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095132/1666666 [1:47:44<45:46, 208.13it/s]

finished frames 6570600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095237/1666666 [1:47:44<45:44, 208.19it/s]

finished frames 6571200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095342/1666666 [1:47:45<45:45, 208.07it/s]

finished frames 6571800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095426/1666666 [1:47:45<45:47, 207.89it/s]

finished frames 6572400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095531/1666666 [1:47:45<45:52, 207.48it/s]

finished frames 6573000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095636/1666666 [1:47:46<45:46, 207.88it/s]

finished frames 6573600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095741/1666666 [1:47:46<45:50, 207.57it/s]

finished frames 6574200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095825/1666666 [1:47:47<45:56, 207.07it/s]

finished frames 6574800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1095930/1666666 [1:47:47<45:48, 207.67it/s]

finished frames 6575400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096036/1666666 [1:47:48<46:30, 204.46it/s]

finished frames 6576000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096141/1666666 [1:47:48<45:53, 207.23it/s]

finished frames 6576600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096225/1666666 [1:47:49<45:47, 207.62it/s]

finished frames 6577200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096330/1666666 [1:47:49<45:45, 207.76it/s]

finished frames 6577800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096435/1666666 [1:47:50<45:50, 207.33it/s]

finished frames 6578400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096540/1666666 [1:47:50<45:44, 207.71it/s]

finished frames 6579000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096624/1666666 [1:47:51<45:51, 207.14it/s]

finished frames 6579600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096729/1666666 [1:47:51<45:51, 207.12it/s]

finished frames 6580200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096834/1666666 [1:47:52<45:52, 207.04it/s]

finished frames 6580800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1096918/1666666 [1:47:52<50:00, 189.88it/s]

finished frames 6581400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097023/1666666 [1:47:53<47:33, 199.64it/s]

finished frames 6582000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097128/1666666 [1:47:53<47:15, 200.85it/s]

finished frames 6582600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097237/1666666 [1:47:54<45:03, 210.65it/s]

finished frames 6583200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097324/1666666 [1:47:54<45:16, 209.61it/s]

finished frames 6583800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097433/1666666 [1:47:55<45:37, 207.92it/s]

finished frames 6584400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097538/1666666 [1:47:55<45:58, 206.31it/s]

finished frames 6585000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097626/1666666 [1:47:56<44:47, 211.71it/s]

finished frames 6585600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097736/1666666 [1:47:56<44:23, 213.58it/s]

finished frames 6586200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097824/1666666 [1:47:57<44:34, 212.70it/s]

finished frames 6586800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1097934/1666666 [1:47:57<44:51, 211.28it/s]

finished frames 6587400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098022/1666666 [1:47:57<45:59, 206.07it/s]

finished frames 6588000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098128/1666666 [1:47:58<45:28, 208.40it/s]

finished frames 6588600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098236/1666666 [1:47:58<44:55, 210.86it/s]

finished frames 6589200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098324/1666666 [1:47:59<44:26, 213.16it/s]

finished frames 6589800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098434/1666666 [1:47:59<44:33, 212.50it/s]

finished frames 6590400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098522/1666666 [1:48:00<45:19, 208.90it/s]

finished frames 6591000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098630/1666666 [1:48:00<45:09, 209.64it/s]

finished frames 6591600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098736/1666666 [1:48:01<45:33, 207.78it/s]

finished frames 6592200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098843/1666666 [1:48:01<45:24, 208.41it/s]

finished frames 6592800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1098929/1666666 [1:48:02<45:12, 209.31it/s]

finished frames 6593400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099039/1666666 [1:48:02<46:50, 202.00it/s]

finished frames 6594000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099126/1666666 [1:48:03<45:09, 209.49it/s]

finished frames 6594600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099236/1666666 [1:48:03<44:33, 212.26it/s]

finished frames 6595200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099324/1666666 [1:48:04<46:19, 204.13it/s]

finished frames 6595800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099432/1666666 [1:48:04<47:27, 199.24it/s]

finished frames 6596400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099542/1666666 [1:48:05<44:18, 213.36it/s]

finished frames 6597000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099630/1666666 [1:48:05<44:11, 213.83it/s]

finished frames 6597600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099740/1666666 [1:48:06<44:05, 214.33it/s]

finished frames 6598200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099828/1666666 [1:48:06<44:08, 214.00it/s]

finished frames 6598800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1099938/1666666 [1:48:07<44:04, 214.28it/s]

finished frames 6599400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100026/1666666 [1:48:07<44:52, 210.48it/s]

finished frames 6600000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100136/1666666 [1:48:08<44:15, 213.37it/s]

finished frames 6600600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100224/1666666 [1:48:08<44:10, 213.70it/s]

finished frames 6601200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100334/1666666 [1:48:08<44:00, 214.49it/s]

finished frames 6601800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100444/1666666 [1:48:09<43:47, 215.48it/s]

finished frames 6602400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100532/1666666 [1:48:09<43:55, 214.84it/s]

finished frames 6603000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100642/1666666 [1:48:10<43:52, 215.04it/s]

finished frames 6603600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100730/1666666 [1:48:10<43:54, 214.79it/s]

finished frames 6604200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100840/1666666 [1:48:11<43:51, 215.01it/s]

finished frames 6604800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1100928/1666666 [1:48:11<43:55, 214.64it/s]

finished frames 6605400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101038/1666666 [1:48:12<44:49, 210.30it/s]

finished frames 6606000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101126/1666666 [1:48:12<44:16, 212.85it/s]

finished frames 6606600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101236/1666666 [1:48:13<44:10, 213.32it/s]

finished frames 6607200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101324/1666666 [1:48:13<44:12, 213.17it/s]

finished frames 6607800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101434/1666666 [1:48:14<44:06, 213.60it/s]

finished frames 6608400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101544/1666666 [1:48:14<43:42, 215.50it/s]

finished frames 6609000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101632/1666666 [1:48:15<43:50, 214.76it/s]

finished frames 6609600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101742/1666666 [1:48:15<45:07, 208.67it/s]

finished frames 6610200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101830/1666666 [1:48:16<46:06, 204.20it/s]

finished frames 6610800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1101940/1666666 [1:48:16<44:15, 212.64it/s]

finished frames 6611400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102028/1666666 [1:48:16<44:50, 209.86it/s]

finished frames 6612000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102138/1666666 [1:48:17<44:06, 213.29it/s]

finished frames 6612600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102226/1666666 [1:48:17<43:57, 214.01it/s]

finished frames 6613200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102336/1666666 [1:48:18<43:52, 214.39it/s]

finished frames 6613800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102424/1666666 [1:48:18<43:54, 214.17it/s]

finished frames 6614400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102534/1666666 [1:48:19<43:45, 214.86it/s]

finished frames 6615000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102644/1666666 [1:48:19<43:49, 214.46it/s]

finished frames 6615600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102732/1666666 [1:48:20<43:51, 214.28it/s]

finished frames 6616200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102842/1666666 [1:48:20<43:43, 214.94it/s]

finished frames 6616800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1102930/1666666 [1:48:21<43:48, 214.46it/s]

finished frames 6617400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103040/1666666 [1:48:21<44:42, 210.09it/s]

finished frames 6618000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103128/1666666 [1:48:22<44:01, 213.34it/s]

finished frames 6618600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103238/1666666 [1:48:22<43:46, 214.55it/s]

finished frames 6619200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103326/1666666 [1:48:23<43:47, 214.38it/s]

finished frames 6619800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103436/1666666 [1:48:23<43:40, 214.97it/s]

finished frames 6620400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103524/1666666 [1:48:23<43:48, 214.21it/s]

finished frames 6621000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103634/1666666 [1:48:24<43:38, 215.02it/s]

finished frames 6621600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103744/1666666 [1:48:24<43:37, 215.07it/s]

finished frames 6622200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103832/1666666 [1:48:25<43:33, 215.35it/s]

finished frames 6622800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1103942/1666666 [1:48:25<43:33, 215.32it/s]

finished frames 6623400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1104030/1666666 [1:48:26<44:41, 209.79it/s]

finished frames 6624000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▌   | 1104140/1666666 [1:48:26<45:06, 207.82it/s]

finished frames 6624600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1104228/1666666 [1:48:27<46:45, 200.48it/s]

finished frames 6625200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1104338/1666666 [1:48:27<44:11, 212.07it/s]

finished frames 6625800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1104426/1666666 [1:48:28<43:52, 213.57it/s]

finished frames 6626400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1104536/1666666 [1:48:28<43:42, 214.38it/s]

finished frames 6627000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1104624/1666666 [1:48:29<43:39, 214.59it/s]

finished frames 6627600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1104734/1666666 [1:48:29<43:34, 214.92it/s]

finished frames 6628200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1104844/1666666 [1:48:30<43:33, 214.94it/s]

finished frames 6628800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1104932/1666666 [1:48:30<43:37, 214.64it/s]

finished frames 6629400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105042/1666666 [1:48:31<44:33, 210.10it/s]

finished frames 6630000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105130/1666666 [1:48:31<44:02, 212.52it/s]

finished frames 6630600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105240/1666666 [1:48:32<43:29, 215.11it/s]

finished frames 6631200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105328/1666666 [1:48:32<43:41, 214.14it/s]

finished frames 6631800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105438/1666666 [1:48:32<43:26, 215.32it/s]

finished frames 6632400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105526/1666666 [1:48:33<43:34, 214.64it/s]

finished frames 6633000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105636/1666666 [1:48:33<43:28, 215.08it/s]

finished frames 6633600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105724/1666666 [1:48:34<43:28, 215.05it/s]

finished frames 6634200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105834/1666666 [1:48:34<43:05, 216.88it/s]

finished frames 6634800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1105944/1666666 [1:48:35<43:21, 215.55it/s]

finished frames 6635400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106032/1666666 [1:48:35<44:15, 211.09it/s]

finished frames 6636000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106142/1666666 [1:48:36<43:26, 215.07it/s]

finished frames 6636600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106230/1666666 [1:48:36<43:24, 215.17it/s]

finished frames 6637200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106340/1666666 [1:48:37<43:26, 214.99it/s]

finished frames 6637800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106428/1666666 [1:48:37<43:27, 214.83it/s]

finished frames 6638400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106538/1666666 [1:48:38<44:45, 208.55it/s]

finished frames 6639000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106626/1666666 [1:48:38<45:34, 204.82it/s]

finished frames 6639600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106736/1666666 [1:48:39<43:44, 213.35it/s]

finished frames 6640200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106824/1666666 [1:48:39<44:08, 211.36it/s]

finished frames 6640800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1106934/1666666 [1:48:40<44:15, 210.80it/s]

finished frames 6641400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107022/1666666 [1:48:40<45:29, 205.05it/s]

finished frames 6642000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107131/1666666 [1:48:40<44:11, 211.07it/s]

finished frames 6642600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107241/1666666 [1:48:41<44:03, 211.60it/s]

finished frames 6643200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107329/1666666 [1:48:41<44:01, 211.77it/s]

finished frames 6643800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107439/1666666 [1:48:42<44:02, 211.66it/s]

finished frames 6644400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107527/1666666 [1:48:42<44:01, 211.71it/s]

finished frames 6645000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107637/1666666 [1:48:43<44:00, 211.73it/s]

finished frames 6645600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107725/1666666 [1:48:43<44:02, 211.52it/s]

finished frames 6646200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107834/1666666 [1:48:44<44:24, 209.70it/s]

finished frames 6646800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1107939/1666666 [1:48:44<44:41, 208.36it/s]

finished frames 6647400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1108023/1666666 [1:48:45<45:59, 202.43it/s]

finished frames 6648000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1108128/1666666 [1:48:45<45:10, 206.04it/s]

finished frames 6648600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 66%|██████▋   | 1108233/1666666 [1:48:46<44:53, 207.30it/s]

finished frames 6649200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1108340/1666666 [1:48:46<44:21, 209.81it/s]

finished frames 6649800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1108428/1666666 [1:48:47<44:07, 210.84it/s]

finished frames 6650400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1108538/1666666 [1:48:47<43:58, 211.55it/s]

finished frames 6651000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1108626/1666666 [1:48:48<43:46, 212.47it/s]

finished frames 6651600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1108736/1666666 [1:48:48<43:35, 213.30it/s]

finished frames 6652200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1108824/1666666 [1:48:49<45:47, 203.05it/s]

finished frames 6652800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1108933/1666666 [1:48:49<44:02, 211.06it/s]

finished frames 6653400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109020/1666666 [1:48:50<46:15, 200.92it/s]

finished frames 6654000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109130/1666666 [1:48:50<44:00, 211.18it/s]

finished frames 6654600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109240/1666666 [1:48:51<43:42, 212.56it/s]

finished frames 6655200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109328/1666666 [1:48:51<43:37, 212.94it/s]

finished frames 6655800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109438/1666666 [1:48:51<43:29, 213.52it/s]

finished frames 6656400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109526/1666666 [1:48:52<43:32, 213.30it/s]

finished frames 6657000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109636/1666666 [1:48:52<43:36, 212.90it/s]

finished frames 6657600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109724/1666666 [1:48:53<43:30, 213.37it/s]

finished frames 6658200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109834/1666666 [1:48:53<43:32, 213.16it/s]

finished frames 6658800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1109942/1666666 [1:48:54<44:25, 208.87it/s]

finished frames 6659400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110026/1666666 [1:48:54<45:44, 202.83it/s]

finished frames 6660000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110131/1666666 [1:48:55<44:55, 206.45it/s]

finished frames 6660600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110236/1666666 [1:48:55<44:40, 207.60it/s]

finished frames 6661200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110341/1666666 [1:48:56<44:39, 207.64it/s]

finished frames 6661800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110425/1666666 [1:48:56<44:40, 207.49it/s]

finished frames 6662400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110534/1666666 [1:48:57<44:00, 210.60it/s]

finished frames 6663000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110640/1666666 [1:48:57<44:43, 207.21it/s]

finished frames 6663600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110724/1666666 [1:48:58<44:47, 206.83it/s]

finished frames 6664200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110829/1666666 [1:48:58<44:48, 206.78it/s]

finished frames 6664800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1110934/1666666 [1:48:59<44:52, 206.41it/s]

finished frames 6665400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111018/1666666 [1:48:59<47:07, 196.54it/s]

finished frames 6666000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111122/1666666 [1:49:00<45:27, 203.65it/s]

finished frames 6666600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111227/1666666 [1:49:00<44:30, 207.96it/s]

finished frames 6667200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111332/1666666 [1:49:01<47:10, 196.22it/s]

finished frames 6667800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111437/1666666 [1:49:01<44:52, 206.21it/s]

finished frames 6668400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111542/1666666 [1:49:02<44:29, 207.94it/s]

finished frames 6669000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111628/1666666 [1:49:02<44:16, 208.96it/s]

finished frames 6669600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111733/1666666 [1:49:03<44:28, 207.97it/s]

finished frames 6670200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111838/1666666 [1:49:03<44:21, 208.47it/s]

finished frames 6670800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1111922/1666666 [1:49:03<44:25, 208.16it/s]

finished frames 6671400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112027/1666666 [1:49:04<45:26, 203.42it/s]

finished frames 6672000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112132/1666666 [1:49:04<44:54, 205.81it/s]

finished frames 6672600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112237/1666666 [1:49:05<44:45, 206.43it/s]

finished frames 6673200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112342/1666666 [1:49:06<44:35, 207.20it/s]

finished frames 6673800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112426/1666666 [1:49:06<44:40, 206.79it/s]

finished frames 6674400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112531/1666666 [1:49:06<44:40, 206.74it/s]

finished frames 6675000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112636/1666666 [1:49:07<44:38, 206.86it/s]

finished frames 6675600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112741/1666666 [1:49:07<44:35, 207.07it/s]

finished frames 6676200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112825/1666666 [1:49:08<44:42, 206.46it/s]

finished frames 6676800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1112930/1666666 [1:49:08<44:43, 206.32it/s]

finished frames 6677400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113035/1666666 [1:49:09<45:28, 202.87it/s]

finished frames 6678000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113140/1666666 [1:49:09<44:50, 205.71it/s]

finished frames 6678600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113224/1666666 [1:49:10<44:45, 206.07it/s]

finished frames 6679200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113329/1666666 [1:49:10<44:38, 206.59it/s]

finished frames 6679800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113434/1666666 [1:49:11<44:29, 207.24it/s]

finished frames 6680400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113540/1666666 [1:49:11<46:04, 200.09it/s]

finished frames 6681000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113624/1666666 [1:49:12<48:48, 188.84it/s]

finished frames 6681600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113729/1666666 [1:49:12<45:02, 204.60it/s]

finished frames 6682200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113836/1666666 [1:49:13<43:58, 209.49it/s]

finished frames 6682800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1113944/1666666 [1:49:13<43:17, 212.76it/s]

finished frames 6683400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114032/1666666 [1:49:14<44:11, 208.39it/s]

finished frames 6684000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114142/1666666 [1:49:14<43:22, 212.28it/s]

finished frames 6684600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114230/1666666 [1:49:15<43:21, 212.37it/s]

finished frames 6685200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114340/1666666 [1:49:15<43:16, 212.72it/s]

finished frames 6685800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114428/1666666 [1:49:16<43:29, 211.61it/s]

finished frames 6686400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114538/1666666 [1:49:16<43:21, 212.23it/s]

finished frames 6687000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114626/1666666 [1:49:17<43:18, 212.48it/s]

finished frames 6687600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114736/1666666 [1:49:17<43:18, 212.43it/s]

finished frames 6688200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114824/1666666 [1:49:17<43:20, 212.24it/s]

finished frames 6688800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1114934/1666666 [1:49:18<43:20, 212.20it/s]

finished frames 6689400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115022/1666666 [1:49:18<44:25, 206.92it/s]

finished frames 6690000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115132/1666666 [1:49:19<43:07, 213.19it/s]

finished frames 6690600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115242/1666666 [1:49:19<42:50, 214.55it/s]

finished frames 6691200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115330/1666666 [1:49:20<42:44, 214.95it/s]

finished frames 6691800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115440/1666666 [1:49:20<42:38, 215.47it/s]

finished frames 6692400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115528/1666666 [1:49:21<42:47, 214.68it/s]

finished frames 6693000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115638/1666666 [1:49:21<42:50, 214.41it/s]

finished frames 6693600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115726/1666666 [1:49:22<42:48, 214.48it/s]

finished frames 6694200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115836/1666666 [1:49:22<42:50, 214.25it/s]

finished frames 6694800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1115924/1666666 [1:49:23<44:29, 206.33it/s]

finished frames 6695400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116034/1666666 [1:49:23<46:30, 197.30it/s]

finished frames 6696000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116122/1666666 [1:49:24<43:56, 208.84it/s]

finished frames 6696600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116232/1666666 [1:49:24<43:11, 212.40it/s]

finished frames 6697200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116342/1666666 [1:49:25<43:32, 210.63it/s]

finished frames 6697800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116430/1666666 [1:49:25<43:30, 210.75it/s]

finished frames 6698400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116540/1666666 [1:49:26<43:29, 210.85it/s]

finished frames 6699000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116628/1666666 [1:49:26<42:52, 213.85it/s]

finished frames 6699600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116738/1666666 [1:49:27<42:41, 214.69it/s]

finished frames 6700200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116826/1666666 [1:49:27<42:49, 214.02it/s]

finished frames 6700800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1116936/1666666 [1:49:27<42:45, 214.32it/s]

finished frames 6701400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117024/1666666 [1:49:28<43:48, 209.13it/s]

finished frames 6702000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117134/1666666 [1:49:28<42:52, 213.59it/s]

finished frames 6702600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117244/1666666 [1:49:29<42:37, 214.85it/s]

finished frames 6703200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117332/1666666 [1:49:29<42:31, 215.29it/s]

finished frames 6703800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117442/1666666 [1:49:30<42:46, 214.02it/s]

finished frames 6704400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117530/1666666 [1:49:30<42:57, 213.08it/s]

finished frames 6705000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117640/1666666 [1:49:31<42:51, 213.54it/s]

finished frames 6705600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117728/1666666 [1:49:31<42:47, 213.76it/s]

finished frames 6706200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117838/1666666 [1:49:32<43:00, 212.65it/s]

finished frames 6706800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1117926/1666666 [1:49:32<43:06, 212.18it/s]

finished frames 6707400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118036/1666666 [1:49:33<43:58, 207.90it/s]

finished frames 6708000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118122/1666666 [1:49:33<43:28, 210.30it/s]

finished frames 6708600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118232/1666666 [1:49:34<43:09, 211.75it/s]

finished frames 6709200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118342/1666666 [1:49:34<44:14, 206.53it/s]

finished frames 6709800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118428/1666666 [1:49:35<43:30, 210.00it/s]

finished frames 6710400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118538/1666666 [1:49:35<43:01, 212.32it/s]

finished frames 6711000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118626/1666666 [1:49:35<43:00, 212.35it/s]

finished frames 6711600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118736/1666666 [1:49:36<43:05, 211.95it/s]

finished frames 6712200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118824/1666666 [1:49:36<42:56, 212.60it/s]

finished frames 6712800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1118934/1666666 [1:49:37<43:05, 211.84it/s]

finished frames 6713400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119022/1666666 [1:49:37<44:20, 205.87it/s]

finished frames 6714000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119131/1666666 [1:49:38<43:14, 211.03it/s]

finished frames 6714600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119241/1666666 [1:49:38<43:03, 211.90it/s]

finished frames 6715200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119329/1666666 [1:49:39<42:56, 212.43it/s]

finished frames 6715800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119439/1666666 [1:49:39<42:51, 212.77it/s]

finished frames 6716400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119527/1666666 [1:49:40<42:52, 212.65it/s]

finished frames 6717000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119637/1666666 [1:49:40<42:54, 212.50it/s]

finished frames 6717600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119725/1666666 [1:49:41<42:56, 212.24it/s]

finished frames 6718200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119835/1666666 [1:49:41<42:51, 212.68it/s]

finished frames 6718800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1119923/1666666 [1:49:42<42:54, 212.38it/s]

finished frames 6719400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120033/1666666 [1:49:42<43:45, 208.24it/s]

finished frames 6720000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120142/1666666 [1:49:43<43:07, 211.23it/s]

finished frames 6720600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120230/1666666 [1:49:43<42:56, 212.05it/s]

finished frames 6721200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120340/1666666 [1:49:44<42:44, 213.07it/s]

finished frames 6721800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120428/1666666 [1:49:44<42:42, 213.17it/s]

finished frames 6722400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120538/1666666 [1:49:44<42:34, 213.79it/s]

finished frames 6723000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120626/1666666 [1:49:45<42:39, 213.36it/s]

finished frames 6723600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120736/1666666 [1:49:45<43:28, 209.26it/s]

finished frames 6724200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120824/1666666 [1:49:46<44:43, 203.42it/s]

finished frames 6724800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1120933/1666666 [1:49:46<43:02, 211.34it/s]

finished frames 6725400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121040/1666666 [1:49:47<44:38, 203.68it/s]

finished frames 6726000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121127/1666666 [1:49:47<43:16, 210.07it/s]

finished frames 6726600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121237/1666666 [1:49:48<42:52, 212.04it/s]

finished frames 6727200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121325/1666666 [1:49:48<42:38, 213.13it/s]

finished frames 6727800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121435/1666666 [1:49:49<42:48, 212.30it/s]

finished frames 6728400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121523/1666666 [1:49:49<43:05, 210.86it/s]

finished frames 6729000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121633/1666666 [1:49:50<43:10, 210.42it/s]

finished frames 6729600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121742/1666666 [1:49:50<43:13, 210.15it/s]

finished frames 6730200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121828/1666666 [1:49:51<43:20, 209.53it/s]

finished frames 6730800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1121935/1666666 [1:49:51<43:14, 209.96it/s]

finished frames 6731400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122041/1666666 [1:49:52<44:17, 204.96it/s]

finished frames 6732000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122126/1666666 [1:49:52<43:34, 208.26it/s]

finished frames 6732600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122231/1666666 [1:49:53<43:30, 208.57it/s]

finished frames 6733200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122337/1666666 [1:49:53<43:46, 207.24it/s]

finished frames 6733800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122423/1666666 [1:49:54<43:10, 210.10it/s]

finished frames 6734400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122533/1666666 [1:49:54<42:56, 211.22it/s]

finished frames 6735000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122643/1666666 [1:49:55<42:48, 211.80it/s]

finished frames 6735600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122729/1666666 [1:49:55<43:40, 207.61it/s]

finished frames 6736200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122837/1666666 [1:49:55<42:44, 212.08it/s]

finished frames 6736800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1122925/1666666 [1:49:56<41:46, 216.96it/s]

finished frames 6737400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123035/1666666 [1:49:56<42:30, 213.19it/s]

finished frames 6738000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123123/1666666 [1:49:57<42:01, 215.57it/s]

finished frames 6738600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123233/1666666 [1:49:57<42:22, 213.77it/s]

finished frames 6739200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123322/1666666 [1:49:58<41:47, 216.67it/s]

finished frames 6739800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123432/1666666 [1:49:58<42:44, 211.80it/s]

finished frames 6740400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123542/1666666 [1:49:59<42:04, 215.11it/s]

finished frames 6741000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123630/1666666 [1:49:59<41:44, 216.79it/s]

finished frames 6741600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123740/1666666 [1:50:00<41:52, 216.09it/s]

finished frames 6742200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123828/1666666 [1:50:00<42:35, 212.42it/s]

finished frames 6742800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1123938/1666666 [1:50:01<42:25, 213.18it/s]

finished frames 6743400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124026/1666666 [1:50:01<43:31, 207.75it/s]

finished frames 6744000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124134/1666666 [1:50:02<42:55, 210.61it/s]

finished frames 6744600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124222/1666666 [1:50:02<42:45, 211.41it/s]

finished frames 6745200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124332/1666666 [1:50:02<42:40, 211.80it/s]

finished frames 6745800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124442/1666666 [1:50:03<42:34, 212.24it/s]

finished frames 6746400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124530/1666666 [1:50:03<42:22, 213.21it/s]

finished frames 6747000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124640/1666666 [1:50:04<42:25, 212.96it/s]

finished frames 6747600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124728/1666666 [1:50:04<42:19, 213.37it/s]

finished frames 6748200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124838/1666666 [1:50:05<42:31, 212.33it/s]

finished frames 6748800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 67%|██████▋   | 1124926/1666666 [1:50:05<42:25, 212.79it/s]

finished frames 6749400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125036/1666666 [1:50:06<43:23, 208.06it/s]

finished frames 6750000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125123/1666666 [1:50:06<42:43, 211.27it/s]

finished frames 6750600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125233/1666666 [1:50:07<42:22, 212.98it/s]

finished frames 6751200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125343/1666666 [1:50:07<42:19, 213.12it/s]

finished frames 6751800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125431/1666666 [1:50:08<44:55, 200.80it/s]

finished frames 6752400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125540/1666666 [1:50:08<42:40, 211.35it/s]

finished frames 6753000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125628/1666666 [1:50:09<42:16, 213.27it/s]

finished frames 6753600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125738/1666666 [1:50:09<42:20, 212.90it/s]

finished frames 6754200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125826/1666666 [1:50:10<42:22, 212.69it/s]

finished frames 6754800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1125936/1666666 [1:50:10<42:22, 212.69it/s]

finished frames 6755400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126024/1666666 [1:50:10<43:13, 208.50it/s]

finished frames 6756000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126134/1666666 [1:50:11<42:45, 210.67it/s]

finished frames 6756600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126222/1666666 [1:50:11<42:31, 211.81it/s]

finished frames 6757200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126332/1666666 [1:50:12<42:32, 211.68it/s]

finished frames 6757800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126442/1666666 [1:50:12<42:28, 211.95it/s]

finished frames 6758400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126530/1666666 [1:50:13<42:30, 211.80it/s]

finished frames 6759000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126640/1666666 [1:50:13<42:27, 211.97it/s]

finished frames 6759600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126728/1666666 [1:50:14<42:29, 211.77it/s]

finished frames 6760200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126838/1666666 [1:50:14<42:22, 212.31it/s]

finished frames 6760800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1126926/1666666 [1:50:15<42:25, 212.03it/s]

finished frames 6761400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127036/1666666 [1:50:15<43:25, 207.13it/s]

finished frames 6762000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127143/1666666 [1:50:16<42:57, 209.29it/s]

finished frames 6762600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127228/1666666 [1:50:16<42:58, 209.18it/s]

finished frames 6763200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127334/1666666 [1:50:17<42:57, 209.23it/s]

finished frames 6763800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127440/1666666 [1:50:17<42:54, 209.45it/s]

finished frames 6764400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127525/1666666 [1:50:18<42:56, 209.25it/s]

finished frames 6765000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127632/1666666 [1:50:18<42:52, 209.55it/s]

finished frames 6765600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127738/1666666 [1:50:19<42:53, 209.41it/s]

finished frames 6766200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127823/1666666 [1:50:19<46:38, 192.53it/s]

finished frames 6766800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1127929/1666666 [1:50:20<46:16, 194.04it/s]

finished frames 6767400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128034/1666666 [1:50:20<44:39, 201.05it/s]

finished frames 6768000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128141/1666666 [1:50:21<43:08, 208.08it/s]

finished frames 6768600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128226/1666666 [1:50:21<42:59, 208.76it/s]

finished frames 6769200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128333/1666666 [1:50:22<42:54, 209.14it/s]

finished frames 6769800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128439/1666666 [1:50:22<42:51, 209.32it/s]

finished frames 6770400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128524/1666666 [1:50:23<42:48, 209.52it/s]

finished frames 6771000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128632/1666666 [1:50:23<42:46, 209.63it/s]

finished frames 6771600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128739/1666666 [1:50:24<42:43, 209.81it/s]

finished frames 6772200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128824/1666666 [1:50:24<42:48, 209.42it/s]

finished frames 6772800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1128932/1666666 [1:50:24<42:38, 210.19it/s]

finished frames 6773400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129038/1666666 [1:50:25<43:39, 205.21it/s]

finished frames 6774000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129124/1666666 [1:50:25<42:58, 208.49it/s]

finished frames 6774600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129232/1666666 [1:50:26<42:52, 208.90it/s]

finished frames 6775200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129339/1666666 [1:50:26<42:44, 209.55it/s]

finished frames 6775800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129425/1666666 [1:50:27<42:43, 209.61it/s]

finished frames 6776400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129530/1666666 [1:50:27<43:25, 206.17it/s]

finished frames 6777000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129635/1666666 [1:50:28<43:31, 205.62it/s]

finished frames 6777600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129742/1666666 [1:50:28<42:37, 209.91it/s]

finished frames 6778200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129830/1666666 [1:50:29<42:23, 211.07it/s]

finished frames 6778800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1129940/1666666 [1:50:29<42:11, 212.01it/s]

finished frames 6779400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130028/1666666 [1:50:30<43:09, 207.27it/s]

finished frames 6780000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130116/1666666 [1:50:30<45:38, 195.93it/s]

finished frames 6780600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130226/1666666 [1:50:31<42:26, 210.70it/s]

finished frames 6781200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130336/1666666 [1:50:31<41:28, 215.56it/s]

finished frames 6781800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130424/1666666 [1:50:32<41:31, 215.26it/s]

finished frames 6782400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130534/1666666 [1:50:32<41:13, 216.78it/s]

finished frames 6783000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130644/1666666 [1:50:33<41:12, 216.81it/s]

finished frames 6783600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130732/1666666 [1:50:33<41:16, 216.44it/s]

finished frames 6784200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130842/1666666 [1:50:34<41:20, 215.99it/s]

finished frames 6784800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1130930/1666666 [1:50:34<41:37, 214.54it/s]

finished frames 6785400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131040/1666666 [1:50:34<42:49, 208.44it/s]

finished frames 6786000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131126/1666666 [1:50:35<42:25, 210.38it/s]

finished frames 6786600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131236/1666666 [1:50:35<42:07, 211.88it/s]

finished frames 6787200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131324/1666666 [1:50:36<41:54, 212.93it/s]

finished frames 6787800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131434/1666666 [1:50:36<41:37, 214.33it/s]

finished frames 6788400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131544/1666666 [1:50:37<41:40, 213.97it/s]

finished frames 6789000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131632/1666666 [1:50:37<41:54, 212.80it/s]

finished frames 6789600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131742/1666666 [1:50:38<42:06, 211.71it/s]

finished frames 6790200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131830/1666666 [1:50:38<42:16, 210.84it/s]

finished frames 6790800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1131939/1666666 [1:50:39<42:21, 210.38it/s]

finished frames 6791400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132025/1666666 [1:50:39<43:44, 203.73it/s]

finished frames 6792000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132130/1666666 [1:50:40<43:04, 206.86it/s]

finished frames 6792600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132235/1666666 [1:50:40<42:52, 207.75it/s]

finished frames 6793200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132340/1666666 [1:50:41<43:56, 202.67it/s]

finished frames 6793800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132424/1666666 [1:50:41<43:25, 205.07it/s]

finished frames 6794400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132531/1666666 [1:50:42<44:31, 199.94it/s]

finished frames 6795000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132636/1666666 [1:50:42<45:35, 195.20it/s]

finished frames 6795600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132742/1666666 [1:50:43<43:04, 206.62it/s]

finished frames 6796200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132826/1666666 [1:50:43<42:49, 207.78it/s]

finished frames 6796800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1132931/1666666 [1:50:44<42:42, 208.31it/s]

finished frames 6797400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133036/1666666 [1:50:44<43:36, 203.96it/s]

finished frames 6798000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133142/1666666 [1:50:45<42:46, 207.91it/s]

finished frames 6798600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133226/1666666 [1:50:45<42:47, 207.74it/s]

finished frames 6799200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133331/1666666 [1:50:46<42:54, 207.19it/s]

finished frames 6799800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133436/1666666 [1:50:46<42:42, 208.08it/s]

finished frames 6800400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133541/1666666 [1:50:47<42:50, 207.39it/s]

finished frames 6801000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133625/1666666 [1:50:47<42:55, 207.00it/s]

finished frames 6801600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133730/1666666 [1:50:47<42:47, 207.58it/s]

finished frames 6802200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133835/1666666 [1:50:48<42:45, 207.69it/s]

finished frames 6802800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1133940/1666666 [1:50:48<42:50, 207.24it/s]

finished frames 6803400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134024/1666666 [1:50:49<43:43, 203.02it/s]

finished frames 6804000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134129/1666666 [1:50:49<43:01, 206.26it/s]

finished frames 6804600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134234/1666666 [1:50:50<42:47, 207.35it/s]

finished frames 6805200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134340/1666666 [1:50:50<42:35, 208.34it/s]

finished frames 6805800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134425/1666666 [1:50:51<42:34, 208.34it/s]

finished frames 6806400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134531/1666666 [1:50:51<42:37, 208.03it/s]

finished frames 6807000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134637/1666666 [1:50:52<42:29, 208.72it/s]

finished frames 6807600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134722/1666666 [1:50:52<42:26, 208.90it/s]

finished frames 6808200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134828/1666666 [1:50:53<47:04, 188.30it/s]

finished frames 6808800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1134933/1666666 [1:50:53<43:47, 202.38it/s]

finished frames 6809400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135040/1666666 [1:50:54<43:00, 206.05it/s]

finished frames 6810000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135128/1666666 [1:50:54<41:50, 211.69it/s]

finished frames 6810600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135238/1666666 [1:50:55<41:57, 211.11it/s]

finished frames 6811200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135323/1666666 [1:50:55<42:24, 208.80it/s]

finished frames 6811800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135428/1666666 [1:50:56<42:35, 207.89it/s]

finished frames 6812400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135533/1666666 [1:50:56<42:45, 207.06it/s]

finished frames 6813000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135638/1666666 [1:50:57<42:33, 208.00it/s]

finished frames 6813600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135722/1666666 [1:50:57<42:59, 205.87it/s]

finished frames 6814200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135827/1666666 [1:50:58<42:46, 206.86it/s]

finished frames 6814800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1135932/1666666 [1:50:58<42:52, 206.30it/s]

finished frames 6815400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136037/1666666 [1:50:59<43:51, 201.68it/s]

finished frames 6816000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136142/1666666 [1:50:59<43:10, 204.82it/s]

finished frames 6816600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136226/1666666 [1:51:00<43:06, 205.06it/s]

finished frames 6817200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136331/1666666 [1:51:00<42:43, 206.89it/s]

finished frames 6817800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136436/1666666 [1:51:01<42:34, 207.59it/s]

finished frames 6818400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136541/1666666 [1:51:01<42:34, 207.52it/s]

finished frames 6819000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136625/1666666 [1:51:01<43:03, 205.15it/s]

finished frames 6819600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136730/1666666 [1:51:02<43:30, 202.96it/s]

finished frames 6820200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136835/1666666 [1:51:02<42:41, 206.86it/s]

finished frames 6820800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1136940/1666666 [1:51:03<42:25, 208.11it/s]

finished frames 6821400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137024/1666666 [1:51:03<43:31, 202.83it/s]

finished frames 6822000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137129/1666666 [1:51:04<42:32, 207.50it/s]

finished frames 6822600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137234/1666666 [1:51:04<43:16, 203.88it/s]

finished frames 6823200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137340/1666666 [1:51:05<43:38, 202.18it/s]

finished frames 6823800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137424/1666666 [1:51:05<42:43, 206.41it/s]

finished frames 6824400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137529/1666666 [1:51:06<42:21, 208.22it/s]

finished frames 6825000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137635/1666666 [1:51:06<42:17, 208.52it/s]

finished frames 6825600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137740/1666666 [1:51:07<42:15, 208.62it/s]

finished frames 6826200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137824/1666666 [1:51:07<42:24, 207.82it/s]

finished frames 6826800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1137929/1666666 [1:51:08<42:26, 207.67it/s]

finished frames 6827400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138034/1666666 [1:51:08<43:27, 202.70it/s]

finished frames 6828000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138139/1666666 [1:51:09<42:45, 205.99it/s]

finished frames 6828600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138223/1666666 [1:51:09<42:34, 206.85it/s]

finished frames 6829200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138328/1666666 [1:51:10<42:18, 208.09it/s]

finished frames 6829800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138433/1666666 [1:51:10<42:23, 207.66it/s]

finished frames 6830400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138538/1666666 [1:51:11<42:16, 208.19it/s]

finished frames 6831000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138622/1666666 [1:51:11<42:23, 207.62it/s]

finished frames 6831600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138728/1666666 [1:51:12<42:14, 208.32it/s]

finished frames 6832200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138833/1666666 [1:51:12<42:16, 208.11it/s]

finished frames 6832800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1138938/1666666 [1:51:13<42:14, 208.18it/s]

finished frames 6833400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139022/1666666 [1:51:13<43:05, 204.08it/s]

finished frames 6834000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139128/1666666 [1:51:14<42:11, 208.37it/s]

finished frames 6834600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139234/1666666 [1:51:14<42:04, 208.92it/s]

finished frames 6835200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139340/1666666 [1:51:15<42:05, 208.79it/s]

finished frames 6835800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139426/1666666 [1:51:15<41:57, 209.40it/s]

finished frames 6836400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139531/1666666 [1:51:16<43:23, 202.48it/s]

finished frames 6837000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139637/1666666 [1:51:16<44:03, 199.38it/s]

finished frames 6837600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139723/1666666 [1:51:17<42:23, 207.20it/s]

finished frames 6838200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139830/1666666 [1:51:17<41:53, 209.57it/s]

finished frames 6838800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1139935/1666666 [1:51:18<42:07, 208.37it/s]

finished frames 6839400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140040/1666666 [1:51:18<43:07, 203.54it/s]

finished frames 6840000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140124/1666666 [1:51:18<42:17, 207.50it/s]

finished frames 6840600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140231/1666666 [1:51:19<41:54, 209.40it/s]

finished frames 6841200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140336/1666666 [1:51:19<42:09, 208.08it/s]

finished frames 6841800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140441/1666666 [1:51:20<42:09, 208.02it/s]

finished frames 6842400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140525/1666666 [1:51:20<42:12, 207.72it/s]

finished frames 6843000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140630/1666666 [1:51:21<42:02, 208.58it/s]

finished frames 6843600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140736/1666666 [1:51:21<41:56, 208.98it/s]

finished frames 6844200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140842/1666666 [1:51:22<41:48, 209.60it/s]

finished frames 6844800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1140926/1666666 [1:51:22<41:55, 208.96it/s]

finished frames 6845400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1141032/1666666 [1:51:23<42:52, 204.33it/s]

finished frames 6846000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1141139/1666666 [1:51:23<41:49, 209.42it/s]

finished frames 6846600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1141224/1666666 [1:51:24<41:54, 208.95it/s]

finished frames 6847200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1141330/1666666 [1:51:24<41:49, 209.36it/s]

finished frames 6847800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1141436/1666666 [1:51:25<41:47, 209.43it/s]

finished frames 6848400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1141542/1666666 [1:51:25<41:47, 209.42it/s]

finished frames 6849000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 68%|██████▊   | 1141626/1666666 [1:51:26<42:03, 208.06it/s]

finished frames 6849600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1141732/1666666 [1:51:26<42:20, 206.60it/s]

finished frames 6850200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1141816/1666666 [1:51:27<45:45, 191.15it/s]

finished frames 6850800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1141942/1666666 [1:51:27<42:16, 206.90it/s]

finished frames 6851400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142026/1666666 [1:51:28<43:09, 202.63it/s]

finished frames 6852000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142131/1666666 [1:51:28<42:13, 207.05it/s]

finished frames 6852600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142236/1666666 [1:51:29<42:05, 207.66it/s]

finished frames 6853200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142341/1666666 [1:51:29<42:02, 207.90it/s]

finished frames 6853800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142425/1666666 [1:51:30<42:12, 206.97it/s]

finished frames 6854400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142530/1666666 [1:51:30<42:06, 207.42it/s]

finished frames 6855000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142636/1666666 [1:51:31<41:58, 208.10it/s]

finished frames 6855600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142741/1666666 [1:51:31<41:59, 207.92it/s]

finished frames 6856200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142825/1666666 [1:51:32<42:12, 206.84it/s]

finished frames 6856800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1142930/1666666 [1:51:32<42:05, 207.34it/s]

finished frames 6857400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143035/1666666 [1:51:33<42:58, 203.05it/s]

finished frames 6858000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143140/1666666 [1:51:33<42:13, 206.64it/s]

finished frames 6858600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143225/1666666 [1:51:33<42:01, 207.61it/s]

finished frames 6859200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143330/1666666 [1:51:34<42:03, 207.34it/s]

finished frames 6859800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143435/1666666 [1:51:34<41:56, 207.88it/s]

finished frames 6860400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143540/1666666 [1:51:35<42:00, 207.54it/s]

finished frames 6861000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143624/1666666 [1:51:35<42:04, 207.22it/s]

finished frames 6861600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143729/1666666 [1:51:36<41:55, 207.87it/s]

finished frames 6862200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143834/1666666 [1:51:36<42:04, 207.13it/s]

finished frames 6862800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1143939/1666666 [1:51:37<41:58, 207.54it/s]

finished frames 6863400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144023/1666666 [1:51:37<42:55, 202.93it/s]

finished frames 6864000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144128/1666666 [1:51:38<42:01, 207.27it/s]

finished frames 6864600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144235/1666666 [1:51:38<41:38, 209.07it/s]

finished frames 6865200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144343/1666666 [1:51:39<42:44, 203.64it/s]

finished frames 6865800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144428/1666666 [1:51:39<41:53, 207.77it/s]

finished frames 6866400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144536/1666666 [1:51:40<41:21, 210.37it/s]

finished frames 6867000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144624/1666666 [1:51:40<41:24, 210.16it/s]

finished frames 6867600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144734/1666666 [1:51:41<40:53, 212.74it/s]

finished frames 6868200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144844/1666666 [1:51:41<40:47, 213.22it/s]

finished frames 6868800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1144932/1666666 [1:51:42<40:49, 212.96it/s]

finished frames 6869400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1145020/1666666 [1:51:42<42:03, 206.74it/s]

finished frames 6870000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1145130/1666666 [1:51:43<40:33, 214.33it/s]

finished frames 6870600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1145240/1666666 [1:51:43<40:22, 215.27it/s]

finished frames 6871200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1145328/1666666 [1:51:44<40:28, 214.68it/s]

finished frames 6871800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1145438/1666666 [1:51:44<40:29, 214.53it/s]

finished frames 6872400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1145526/1666666 [1:51:44<40:32, 214.27it/s]

finished frames 6873000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1145636/1666666 [1:51:45<40:28, 214.58it/s]

finished frames 6873600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▊   | 1145724/1666666 [1:51:45<40:37, 213.70it/s]

finished frames 6874200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1145834/1666666 [1:51:46<40:36, 213.76it/s]

finished frames 6874800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1145944/1666666 [1:51:46<40:31, 214.13it/s]

finished frames 6875400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146032/1666666 [1:51:47<41:34, 208.68it/s]

finished frames 6876000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146142/1666666 [1:51:47<40:45, 212.81it/s]

finished frames 6876600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146230/1666666 [1:51:48<40:38, 213.44it/s]

finished frames 6877200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146340/1666666 [1:51:48<40:32, 213.90it/s]

finished frames 6877800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146428/1666666 [1:51:49<40:34, 213.71it/s]

finished frames 6878400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146538/1666666 [1:51:49<40:34, 213.64it/s]

finished frames 6879000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146626/1666666 [1:51:50<40:33, 213.70it/s]

finished frames 6879600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146736/1666666 [1:51:50<40:31, 213.80it/s]

finished frames 6880200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146824/1666666 [1:51:51<40:29, 213.94it/s]

finished frames 6880800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1146934/1666666 [1:51:51<40:29, 213.94it/s]

finished frames 6881400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147022/1666666 [1:51:51<41:49, 207.10it/s]

finished frames 6882000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147132/1666666 [1:51:52<40:47, 212.24it/s]

finished frames 6882600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147242/1666666 [1:51:53<40:27, 214.00it/s]

finished frames 6883200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147330/1666666 [1:51:53<40:29, 213.76it/s]

finished frames 6883800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147440/1666666 [1:51:53<41:00, 211.00it/s]

finished frames 6884400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147529/1666666 [1:51:54<39:56, 216.65it/s]

finished frames 6885000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147639/1666666 [1:51:54<40:00, 216.18it/s]

finished frames 6885600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147727/1666666 [1:51:55<40:57, 211.15it/s]

finished frames 6886200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147837/1666666 [1:51:55<40:46, 212.06it/s]

finished frames 6886800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1147925/1666666 [1:51:56<39:56, 216.44it/s]

finished frames 6887400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148035/1666666 [1:51:56<40:37, 212.78it/s]

finished frames 6888000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148123/1666666 [1:51:57<40:04, 215.68it/s]

finished frames 6888600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148234/1666666 [1:51:57<39:30, 218.73it/s]

finished frames 6889200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148345/1666666 [1:51:58<39:43, 217.46it/s]

finished frames 6889800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148433/1666666 [1:51:58<40:26, 213.59it/s]

finished frames 6890400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148543/1666666 [1:51:59<40:58, 210.78it/s]

finished frames 6891000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148632/1666666 [1:51:59<39:49, 216.82it/s]

finished frames 6891600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148743/1666666 [1:52:00<39:41, 217.50it/s]

finished frames 6892200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148831/1666666 [1:52:00<39:35, 218.01it/s]

finished frames 6892800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1148919/1666666 [1:52:00<40:28, 213.24it/s]

finished frames 6893400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149027/1666666 [1:52:01<42:06, 204.88it/s]

finished frames 6894000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149136/1666666 [1:52:01<41:47, 206.43it/s]

finished frames 6894600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149222/1666666 [1:52:02<41:10, 209.44it/s]

finished frames 6895200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149331/1666666 [1:52:02<41:26, 208.07it/s]

finished frames 6895800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149438/1666666 [1:52:03<41:00, 210.19it/s]

finished frames 6896400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149526/1666666 [1:52:03<40:44, 211.58it/s]

finished frames 6897000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149636/1666666 [1:52:04<40:42, 211.64it/s]

finished frames 6897600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149724/1666666 [1:52:04<40:30, 212.70it/s]

finished frames 6898200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149833/1666666 [1:52:05<41:11, 209.14it/s]

finished frames 6898800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1149938/1666666 [1:52:05<41:27, 207.71it/s]

finished frames 6899400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150023/1666666 [1:52:06<42:12, 204.00it/s]

finished frames 6900000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150133/1666666 [1:52:06<40:56, 210.29it/s]

finished frames 6900600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150243/1666666 [1:52:07<40:27, 212.75it/s]

finished frames 6901200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150331/1666666 [1:52:07<40:34, 212.09it/s]

finished frames 6901800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150441/1666666 [1:52:08<40:27, 212.68it/s]

finished frames 6902400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150529/1666666 [1:52:08<40:14, 213.77it/s]

finished frames 6903000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150638/1666666 [1:52:09<41:02, 209.59it/s]

finished frames 6903600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150722/1666666 [1:52:09<41:24, 207.67it/s]

finished frames 6904200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150827/1666666 [1:52:09<41:36, 206.60it/s]

finished frames 6904800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1150932/1666666 [1:52:10<41:36, 206.60it/s]

finished frames 6905400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151037/1666666 [1:52:10<42:33, 201.97it/s]

finished frames 6906000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151142/1666666 [1:52:11<41:49, 205.43it/s]

finished frames 6906600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151226/1666666 [1:52:11<41:42, 205.95it/s]

finished frames 6907200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151331/1666666 [1:52:12<43:13, 198.68it/s]

finished frames 6907800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151437/1666666 [1:52:12<44:11, 194.32it/s]

finished frames 6908400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151523/1666666 [1:52:13<41:46, 205.54it/s]

finished frames 6909000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151633/1666666 [1:52:13<40:30, 211.88it/s]

finished frames 6909600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151743/1666666 [1:52:14<40:24, 212.42it/s]

finished frames 6910200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151831/1666666 [1:52:14<40:24, 212.38it/s]

finished frames 6910800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1151940/1666666 [1:52:15<41:00, 209.21it/s]

finished frames 6911400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152024/1666666 [1:52:15<42:15, 202.99it/s]

finished frames 6912000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152129/1666666 [1:52:16<41:42, 205.62it/s]

finished frames 6912600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152234/1666666 [1:52:16<41:27, 206.77it/s]

finished frames 6913200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152341/1666666 [1:52:17<40:57, 209.25it/s]

finished frames 6913800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152428/1666666 [1:52:17<40:38, 210.92it/s]

finished frames 6914400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152538/1666666 [1:52:18<40:30, 211.50it/s]

finished frames 6915000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152626/1666666 [1:52:18<40:27, 211.76it/s]

finished frames 6915600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152736/1666666 [1:52:19<40:20, 212.31it/s]

finished frames 6916200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152824/1666666 [1:52:19<40:20, 212.27it/s]

finished frames 6916800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1152934/1666666 [1:52:20<40:19, 212.35it/s]

finished frames 6917400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153022/1666666 [1:52:20<41:38, 205.60it/s]

finished frames 6918000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153131/1666666 [1:52:21<40:43, 210.17it/s]

finished frames 6918600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153241/1666666 [1:52:21<40:17, 212.37it/s]

finished frames 6919200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153329/1666666 [1:52:21<40:20, 212.06it/s]

finished frames 6919800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153439/1666666 [1:52:22<40:18, 212.19it/s]

finished frames 6920400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153527/1666666 [1:52:22<40:17, 212.28it/s]

finished frames 6921000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153637/1666666 [1:52:23<40:11, 212.79it/s]

finished frames 6921600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153725/1666666 [1:52:23<41:23, 206.51it/s]

finished frames 6922200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153834/1666666 [1:52:24<41:41, 205.02it/s]

finished frames 6922800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1153943/1666666 [1:52:24<40:26, 211.30it/s]

finished frames 6923400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154031/1666666 [1:52:25<41:09, 207.56it/s]

finished frames 6924000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154140/1666666 [1:52:25<40:20, 211.78it/s]

finished frames 6924600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154228/1666666 [1:52:26<40:19, 211.82it/s]

finished frames 6925200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154338/1666666 [1:52:26<40:16, 212.04it/s]

finished frames 6925800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154426/1666666 [1:52:27<40:18, 211.79it/s]

finished frames 6926400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154536/1666666 [1:52:27<40:24, 211.20it/s]

finished frames 6927000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154624/1666666 [1:52:28<40:37, 210.07it/s]

finished frames 6927600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154734/1666666 [1:52:28<40:15, 211.97it/s]

finished frames 6928200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154822/1666666 [1:52:29<40:22, 211.28it/s]

finished frames 6928800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1154932/1666666 [1:52:29<40:10, 212.33it/s]

finished frames 6929400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155020/1666666 [1:52:30<41:28, 205.61it/s]

finished frames 6930000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155129/1666666 [1:52:30<40:23, 211.09it/s]

finished frames 6930600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155239/1666666 [1:52:31<40:13, 211.91it/s]

finished frames 6931200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155327/1666666 [1:52:31<40:09, 212.26it/s]

finished frames 6931800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155437/1666666 [1:52:32<40:12, 211.90it/s]

finished frames 6932400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155525/1666666 [1:52:32<40:06, 212.38it/s]

finished frames 6933000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155635/1666666 [1:52:32<40:08, 212.21it/s]

finished frames 6933600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155723/1666666 [1:52:33<40:09, 212.04it/s]

finished frames 6934200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155833/1666666 [1:52:33<40:11, 211.84it/s]

finished frames 6934800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1155943/1666666 [1:52:34<40:04, 212.41it/s]

finished frames 6935400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156031/1666666 [1:52:34<43:31, 195.54it/s]

finished frames 6936000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156118/1666666 [1:52:35<40:58, 207.67it/s]

finished frames 6936600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156227/1666666 [1:52:35<41:07, 206.90it/s]

finished frames 6937200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156337/1666666 [1:52:36<40:17, 211.14it/s]

finished frames 6937800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156425/1666666 [1:52:36<40:09, 211.78it/s]

finished frames 6938400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156535/1666666 [1:52:37<40:07, 211.89it/s]

finished frames 6939000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156623/1666666 [1:52:37<40:04, 212.15it/s]

finished frames 6939600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156733/1666666 [1:52:38<40:00, 212.46it/s]

finished frames 6940200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156843/1666666 [1:52:38<39:53, 212.96it/s]

finished frames 6940800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1156931/1666666 [1:52:39<40:05, 211.89it/s]

finished frames 6941400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157041/1666666 [1:52:39<40:52, 207.80it/s]

finished frames 6942000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157128/1666666 [1:52:40<40:11, 211.32it/s]

finished frames 6942600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157238/1666666 [1:52:40<39:53, 212.84it/s]

finished frames 6943200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157326/1666666 [1:52:41<39:55, 212.66it/s]

finished frames 6943800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157436/1666666 [1:52:41<39:52, 212.81it/s]

finished frames 6944400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157524/1666666 [1:52:41<39:46, 213.33it/s]

finished frames 6945000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157634/1666666 [1:52:42<39:46, 213.28it/s]

finished frames 6945600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157744/1666666 [1:52:42<39:46, 213.28it/s]

finished frames 6946200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157832/1666666 [1:52:43<39:46, 213.20it/s]

finished frames 6946800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1157942/1666666 [1:52:43<39:43, 213.47it/s]

finished frames 6947400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1158030/1666666 [1:52:44<41:14, 205.54it/s]

finished frames 6948000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1158139/1666666 [1:52:44<39:53, 212.43it/s]

finished frames 6948600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 69%|██████▉   | 1158227/1666666 [1:52:45<39:41, 213.50it/s]

finished frames 6949200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1158337/1666666 [1:52:45<39:39, 213.61it/s]

finished frames 6949800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1158424/1666666 [1:52:46<41:52, 202.28it/s]

finished frames 6950400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1158532/1666666 [1:52:46<43:01, 196.86it/s]

finished frames 6951000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1158642/1666666 [1:52:47<40:10, 210.76it/s]

finished frames 6951600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1158730/1666666 [1:52:47<39:38, 213.56it/s]

finished frames 6952200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1158840/1666666 [1:52:48<39:30, 214.22it/s]

finished frames 6952800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1158928/1666666 [1:52:48<39:32, 214.02it/s]

finished frames 6953400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159038/1666666 [1:52:49<40:23, 209.47it/s]

finished frames 6954000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159126/1666666 [1:52:49<39:46, 212.63it/s]

finished frames 6954600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159236/1666666 [1:52:50<39:47, 212.54it/s]

finished frames 6955200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159324/1666666 [1:52:50<39:40, 213.11it/s]

finished frames 6955800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159434/1666666 [1:52:51<39:31, 213.87it/s]

finished frames 6956400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159522/1666666 [1:52:51<39:37, 213.35it/s]

finished frames 6957000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159632/1666666 [1:52:51<39:31, 213.77it/s]

finished frames 6957600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159742/1666666 [1:52:52<39:30, 213.84it/s]

finished frames 6958200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159830/1666666 [1:52:52<39:39, 213.01it/s]

finished frames 6958800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1159940/1666666 [1:52:53<39:34, 213.42it/s]

finished frames 6959400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160028/1666666 [1:52:53<40:30, 208.48it/s]

finished frames 6960000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160138/1666666 [1:52:54<39:50, 211.89it/s]

finished frames 6960600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160226/1666666 [1:52:54<39:42, 212.54it/s]

finished frames 6961200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160336/1666666 [1:52:55<39:55, 211.35it/s]

finished frames 6961800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160424/1666666 [1:52:55<39:48, 211.91it/s]

finished frames 6962400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160534/1666666 [1:52:56<39:39, 212.72it/s]

finished frames 6963000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160644/1666666 [1:52:56<39:21, 214.31it/s]

finished frames 6963600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160732/1666666 [1:52:57<39:20, 214.32it/s]

finished frames 6964200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160842/1666666 [1:52:57<40:24, 208.59it/s]

finished frames 6964800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1160930/1666666 [1:52:58<42:07, 200.11it/s]

finished frames 6965400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161038/1666666 [1:52:58<40:55, 205.91it/s]

finished frames 6966000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161125/1666666 [1:52:59<39:53, 211.18it/s]

finished frames 6966600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161235/1666666 [1:52:59<39:26, 213.60it/s]

finished frames 6967200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161323/1666666 [1:52:59<39:30, 213.14it/s]

finished frames 6967800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161433/1666666 [1:53:00<40:07, 209.86it/s]

finished frames 6968400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161541/1666666 [1:53:01<39:53, 211.04it/s]

finished frames 6969000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161629/1666666 [1:53:01<39:53, 210.97it/s]

finished frames 6969600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161739/1666666 [1:53:01<39:48, 211.39it/s]

finished frames 6970200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161827/1666666 [1:53:02<39:45, 211.60it/s]

finished frames 6970800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1161937/1666666 [1:53:02<39:50, 211.13it/s]

finished frames 6971400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162024/1666666 [1:53:03<40:47, 206.19it/s]

finished frames 6972000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162130/1666666 [1:53:03<40:08, 209.46it/s]

finished frames 6972600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162237/1666666 [1:53:04<40:02, 209.94it/s]

finished frames 6973200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162323/1666666 [1:53:04<40:05, 209.65it/s]

finished frames 6973800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162429/1666666 [1:53:05<40:07, 209.41it/s]

finished frames 6974400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162535/1666666 [1:53:05<40:11, 209.04it/s]

finished frames 6975000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162641/1666666 [1:53:06<40:02, 209.83it/s]

finished frames 6975600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162726/1666666 [1:53:06<40:12, 208.93it/s]

finished frames 6976200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162833/1666666 [1:53:07<40:07, 209.29it/s]

finished frames 6976800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1162941/1666666 [1:53:07<40:00, 209.80it/s]

finished frames 6977400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163025/1666666 [1:53:08<40:54, 205.22it/s]

finished frames 6978000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163133/1666666 [1:53:08<43:30, 192.89it/s]

finished frames 6978600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163241/1666666 [1:53:09<40:18, 208.18it/s]

finished frames 6979200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163327/1666666 [1:53:09<39:53, 210.33it/s]

finished frames 6979800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163437/1666666 [1:53:10<39:41, 211.30it/s]

finished frames 6980400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163525/1666666 [1:53:10<39:45, 210.91it/s]

finished frames 6981000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163635/1666666 [1:53:11<39:45, 210.84it/s]

finished frames 6981600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163723/1666666 [1:53:11<39:44, 210.93it/s]

finished frames 6982200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163833/1666666 [1:53:11<39:37, 211.49it/s]

finished frames 6982800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1163943/1666666 [1:53:12<39:36, 211.50it/s]

finished frames 6983400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164030/1666666 [1:53:12<40:36, 206.30it/s]

finished frames 6984000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164137/1666666 [1:53:13<39:53, 209.95it/s]

finished frames 6984600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164224/1666666 [1:53:13<39:54, 209.83it/s]

finished frames 6985200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164334/1666666 [1:53:14<39:34, 211.55it/s]

finished frames 6985800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164422/1666666 [1:53:14<39:35, 211.43it/s]

finished frames 6986400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164532/1666666 [1:53:15<39:37, 211.24it/s]

finished frames 6987000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164642/1666666 [1:53:15<39:29, 211.91it/s]

finished frames 6987600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164730/1666666 [1:53:16<39:31, 211.66it/s]

finished frames 6988200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164840/1666666 [1:53:16<39:26, 212.09it/s]

finished frames 6988800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1164928/1666666 [1:53:17<39:24, 212.16it/s]

finished frames 6989400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165038/1666666 [1:53:17<40:20, 207.21it/s]

finished frames 6990000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165125/1666666 [1:53:18<39:36, 211.00it/s]

finished frames 6990600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165235/1666666 [1:53:18<39:23, 212.16it/s]

finished frames 6991200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165323/1666666 [1:53:19<39:28, 211.68it/s]

finished frames 6991800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165433/1666666 [1:53:19<39:21, 212.24it/s]

finished frames 6992400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165543/1666666 [1:53:20<41:09, 202.90it/s]

finished frames 6993000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165629/1666666 [1:53:20<43:29, 192.01it/s]

finished frames 6993600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165738/1666666 [1:53:21<40:01, 208.56it/s]

finished frames 6994200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165826/1666666 [1:53:21<39:25, 211.71it/s]

finished frames 6994800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1165936/1666666 [1:53:22<39:11, 212.91it/s]

finished frames 6995400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1166024/1666666 [1:53:22<39:54, 209.06it/s]

finished frames 6996000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1166134/1666666 [1:53:22<39:19, 212.10it/s]

finished frames 6996600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1166244/1666666 [1:53:23<39:20, 212.01it/s]

finished frames 6997200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1166332/1666666 [1:53:23<39:23, 211.66it/s]

finished frames 6997800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1166442/1666666 [1:53:24<39:21, 211.85it/s]

finished frames 6998400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1166530/1666666 [1:53:24<39:17, 212.13it/s]

finished frames 6999000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|██████▉   | 1166640/1666666 [1:53:25<39:14, 212.36it/s]

finished frames 6999600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1166728/1666666 [1:53:25<39:21, 211.66it/s]

finished frames 7000200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1166838/1666666 [1:53:26<39:21, 211.70it/s]

finished frames 7000800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1166926/1666666 [1:53:26<39:42, 209.74it/s]

finished frames 7001400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167031/1666666 [1:53:27<40:52, 203.76it/s]

finished frames 7002000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167138/1666666 [1:53:27<39:45, 209.38it/s]

finished frames 7002600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167222/1666666 [1:53:28<40:09, 207.28it/s]

finished frames 7003200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167328/1666666 [1:53:28<40:08, 207.34it/s]

finished frames 7003800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167435/1666666 [1:53:29<39:54, 208.52it/s]

finished frames 7004400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167541/1666666 [1:53:29<39:57, 208.22it/s]

finished frames 7005000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167626/1666666 [1:53:30<40:03, 207.65it/s]

finished frames 7005600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167732/1666666 [1:53:30<40:02, 207.65it/s]

finished frames 7006200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167837/1666666 [1:53:31<39:57, 208.02it/s]

finished frames 7006800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1167923/1666666 [1:53:31<41:00, 202.70it/s]

finished frames 7007400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168026/1666666 [1:53:32<45:38, 182.10it/s]

finished frames 7008000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168132/1666666 [1:53:32<40:47, 203.72it/s]

finished frames 7008600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168238/1666666 [1:53:33<40:08, 206.96it/s]

finished frames 7009200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168322/1666666 [1:53:33<40:03, 207.34it/s]

finished frames 7009800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168429/1666666 [1:53:34<39:56, 207.93it/s]

finished frames 7010400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168539/1666666 [1:53:34<38:58, 213.04it/s]

finished frames 7011000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168627/1666666 [1:53:34<38:55, 213.28it/s]

finished frames 7011600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168737/1666666 [1:53:35<38:48, 213.80it/s]

finished frames 7012200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168825/1666666 [1:53:35<38:53, 213.33it/s]

finished frames 7012800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1168935/1666666 [1:53:36<38:52, 213.39it/s]

finished frames 7013400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169023/1666666 [1:53:36<39:38, 209.24it/s]

finished frames 7014000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169133/1666666 [1:53:37<39:01, 212.51it/s]

finished frames 7014600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169243/1666666 [1:53:37<38:49, 213.55it/s]

finished frames 7015200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169331/1666666 [1:53:38<38:43, 214.05it/s]

finished frames 7015800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169441/1666666 [1:53:38<38:46, 213.68it/s]

finished frames 7016400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169529/1666666 [1:53:39<38:54, 212.94it/s]

finished frames 7017000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169639/1666666 [1:53:39<38:51, 213.17it/s]

finished frames 7017600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169727/1666666 [1:53:40<38:46, 213.56it/s]

finished frames 7018200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169837/1666666 [1:53:40<38:47, 213.49it/s]

finished frames 7018800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1169925/1666666 [1:53:41<38:47, 213.42it/s]

finished frames 7019400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170035/1666666 [1:53:41<39:35, 209.06it/s]

finished frames 7020000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170122/1666666 [1:53:42<39:08, 211.44it/s]

finished frames 7020600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170232/1666666 [1:53:42<41:15, 200.56it/s]

finished frames 7021200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170319/1666666 [1:53:42<39:37, 208.80it/s]

finished frames 7021800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170427/1666666 [1:53:43<39:55, 207.17it/s]

finished frames 7022400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170536/1666666 [1:53:44<39:05, 211.50it/s]

finished frames 7023000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170624/1666666 [1:53:44<39:01, 211.89it/s]

finished frames 7023600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170734/1666666 [1:53:44<38:56, 212.25it/s]

finished frames 7024200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170822/1666666 [1:53:45<39:02, 211.67it/s]

finished frames 7024800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1170932/1666666 [1:53:45<38:59, 211.87it/s]

finished frames 7025400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171020/1666666 [1:53:46<40:04, 206.10it/s]

finished frames 7026000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171130/1666666 [1:53:46<38:56, 212.10it/s]

finished frames 7026600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171240/1666666 [1:53:47<39:01, 211.57it/s]

finished frames 7027200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171328/1666666 [1:53:47<38:54, 212.21it/s]

finished frames 7027800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171438/1666666 [1:53:48<38:48, 212.68it/s]

finished frames 7028400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171526/1666666 [1:53:48<38:50, 212.49it/s]

finished frames 7029000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171636/1666666 [1:53:49<38:52, 212.27it/s]

finished frames 7029600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171724/1666666 [1:53:49<38:57, 211.71it/s]

finished frames 7030200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171834/1666666 [1:53:50<38:41, 213.15it/s]

finished frames 7030800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1171922/1666666 [1:53:50<38:42, 212.99it/s]

finished frames 7031400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172032/1666666 [1:53:51<39:30, 208.62it/s]

finished frames 7032000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172141/1666666 [1:53:51<38:41, 213.00it/s]

finished frames 7032600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172229/1666666 [1:53:52<38:31, 213.89it/s]

finished frames 7033200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172339/1666666 [1:53:52<38:29, 214.08it/s]

finished frames 7033800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172427/1666666 [1:53:52<38:33, 213.60it/s]

finished frames 7034400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172537/1666666 [1:53:53<38:33, 213.60it/s]

finished frames 7035000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172624/1666666 [1:53:53<39:39, 207.65it/s]

finished frames 7035600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172734/1666666 [1:53:54<39:00, 211.08it/s]

finished frames 7036200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172822/1666666 [1:53:54<38:42, 212.66it/s]

finished frames 7036800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1172930/1666666 [1:53:55<39:27, 208.57it/s]

finished frames 7037400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173036/1666666 [1:53:55<40:00, 205.65it/s]

finished frames 7038000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173124/1666666 [1:53:56<38:48, 211.94it/s]

finished frames 7038600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173234/1666666 [1:53:56<38:25, 213.99it/s]

finished frames 7039200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173322/1666666 [1:53:57<38:31, 213.44it/s]

finished frames 7039800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173432/1666666 [1:53:57<38:18, 214.56it/s]

finished frames 7040400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173542/1666666 [1:53:58<38:09, 215.43it/s]

finished frames 7041000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173630/1666666 [1:53:58<38:51, 211.50it/s]

finished frames 7041600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173740/1666666 [1:53:59<39:10, 209.75it/s]

finished frames 7042200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173828/1666666 [1:53:59<38:06, 215.55it/s]

finished frames 7042800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1173938/1666666 [1:54:00<38:27, 213.52it/s]

finished frames 7043400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174026/1666666 [1:54:00<39:12, 209.39it/s]

finished frames 7044000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174136/1666666 [1:54:01<38:57, 210.73it/s]

finished frames 7044600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174224/1666666 [1:54:01<39:05, 209.99it/s]

finished frames 7045200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174332/1666666 [1:54:01<39:57, 205.33it/s]

finished frames 7045800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174440/1666666 [1:54:02<39:07, 209.64it/s]

finished frames 7046400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174525/1666666 [1:54:02<39:15, 208.92it/s]

finished frames 7047000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174633/1666666 [1:54:03<38:59, 210.29it/s]

finished frames 7047600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174741/1666666 [1:54:03<39:04, 209.81it/s]

finished frames 7048200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174826/1666666 [1:54:04<39:14, 208.85it/s]

finished frames 7048800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 70%|███████   | 1174933/1666666 [1:54:04<39:12, 209.02it/s]

finished frames 7049400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175037/1666666 [1:54:05<41:20, 198.16it/s]

finished frames 7050000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175142/1666666 [1:54:05<41:21, 198.07it/s]

finished frames 7050600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175227/1666666 [1:54:06<39:41, 206.36it/s]

finished frames 7051200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175333/1666666 [1:54:06<39:19, 208.24it/s]

finished frames 7051800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175439/1666666 [1:54:07<39:09, 209.08it/s]

finished frames 7052400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175523/1666666 [1:54:07<39:13, 208.65it/s]

finished frames 7053000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175629/1666666 [1:54:08<39:13, 208.60it/s]

finished frames 7053600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175735/1666666 [1:54:08<39:08, 209.03it/s]

finished frames 7054200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175842/1666666 [1:54:09<38:58, 209.90it/s]

finished frames 7054800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1175926/1666666 [1:54:09<39:06, 209.10it/s]

finished frames 7055400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176036/1666666 [1:54:10<39:27, 207.27it/s]

finished frames 7056000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176124/1666666 [1:54:10<38:41, 211.29it/s]

finished frames 7056600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176234/1666666 [1:54:11<38:33, 212.03it/s]

finished frames 7057200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176322/1666666 [1:54:11<38:27, 212.49it/s]

finished frames 7057800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176432/1666666 [1:54:12<38:17, 213.38it/s]

finished frames 7058400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176542/1666666 [1:54:12<38:22, 212.88it/s]

finished frames 7059000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176630/1666666 [1:54:13<38:26, 212.50it/s]

finished frames 7059600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176740/1666666 [1:54:13<38:24, 212.59it/s]

finished frames 7060200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176828/1666666 [1:54:13<38:32, 211.84it/s]

finished frames 7060800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1176938/1666666 [1:54:14<38:29, 212.05it/s]

finished frames 7061400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177025/1666666 [1:54:14<39:21, 207.31it/s]

finished frames 7062000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177135/1666666 [1:54:15<38:31, 211.77it/s]

finished frames 7062600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177223/1666666 [1:54:15<38:25, 212.26it/s]

finished frames 7063200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177311/1666666 [1:54:16<38:23, 212.42it/s]

finished frames 7063800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177442/1666666 [1:54:16<39:19, 207.37it/s]

finished frames 7064400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177530/1666666 [1:54:17<38:22, 212.48it/s]

finished frames 7065000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177640/1666666 [1:54:17<38:05, 213.97it/s]

finished frames 7065600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177728/1666666 [1:54:18<38:11, 213.35it/s]

finished frames 7066200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177838/1666666 [1:54:18<38:08, 213.62it/s]

finished frames 7066800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1177926/1666666 [1:54:19<38:09, 213.45it/s]

finished frames 7067400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178036/1666666 [1:54:19<38:49, 209.79it/s]

finished frames 7068000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178124/1666666 [1:54:20<38:24, 212.00it/s]

finished frames 7068600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178234/1666666 [1:54:20<38:06, 213.64it/s]

finished frames 7069200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178344/1666666 [1:54:21<37:53, 214.76it/s]

finished frames 7069800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178432/1666666 [1:54:21<37:57, 214.35it/s]

finished frames 7070400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178542/1666666 [1:54:22<37:47, 215.30it/s]

finished frames 7071000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178630/1666666 [1:54:22<38:01, 213.91it/s]

finished frames 7071600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178740/1666666 [1:54:22<38:15, 212.57it/s]

finished frames 7072200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178827/1666666 [1:54:23<38:56, 208.77it/s]

finished frames 7072800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1178932/1666666 [1:54:23<39:22, 206.42it/s]

finished frames 7073400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179037/1666666 [1:54:24<40:08, 202.44it/s]

finished frames 7074000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179143/1666666 [1:54:24<39:08, 207.62it/s]

finished frames 7074600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179227/1666666 [1:54:25<39:03, 208.00it/s]

finished frames 7075200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179332/1666666 [1:54:25<39:17, 206.74it/s]

finished frames 7075800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179437/1666666 [1:54:26<39:03, 207.86it/s]

finished frames 7076400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179542/1666666 [1:54:26<38:58, 208.26it/s]

finished frames 7077000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179626/1666666 [1:54:27<39:04, 207.70it/s]

finished frames 7077600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179733/1666666 [1:54:27<40:17, 201.44it/s]

finished frames 7078200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179819/1666666 [1:54:28<42:20, 191.64it/s]

finished frames 7078800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1179928/1666666 [1:54:28<39:03, 207.72it/s]

finished frames 7079400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180037/1666666 [1:54:29<39:09, 207.10it/s]

finished frames 7080000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180123/1666666 [1:54:29<38:23, 211.24it/s]

finished frames 7080600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180233/1666666 [1:54:30<38:03, 213.02it/s]

finished frames 7081200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180343/1666666 [1:54:30<38:21, 211.30it/s]

finished frames 7081800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180429/1666666 [1:54:31<38:49, 208.77it/s]

finished frames 7082400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180534/1666666 [1:54:31<38:52, 208.46it/s]

finished frames 7083000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180639/1666666 [1:54:32<38:50, 208.57it/s]

finished frames 7083600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180723/1666666 [1:54:32<39:01, 207.51it/s]

finished frames 7084200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180829/1666666 [1:54:33<38:51, 208.38it/s]

finished frames 7084800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1180935/1666666 [1:54:33<38:48, 208.57it/s]

finished frames 7085400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181040/1666666 [1:54:34<40:09, 201.57it/s]

finished frames 7086000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181124/1666666 [1:54:34<39:18, 205.84it/s]

finished frames 7086600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181229/1666666 [1:54:35<38:53, 208.01it/s]

finished frames 7087200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181335/1666666 [1:54:35<38:47, 208.48it/s]

finished frames 7087800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181440/1666666 [1:54:36<38:47, 208.45it/s]

finished frames 7088400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181524/1666666 [1:54:36<38:53, 207.93it/s]

finished frames 7089000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181630/1666666 [1:54:36<38:36, 209.37it/s]

finished frames 7089600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181736/1666666 [1:54:37<38:46, 208.43it/s]

finished frames 7090200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181841/1666666 [1:54:37<38:58, 207.35it/s]

finished frames 7090800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1181925/1666666 [1:54:38<38:58, 207.33it/s]

finished frames 7091400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182030/1666666 [1:54:38<43:15, 186.76it/s]

finished frames 7092000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182135/1666666 [1:54:39<39:44, 203.22it/s]

finished frames 7092600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182240/1666666 [1:54:39<39:13, 205.82it/s]

finished frames 7093200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182324/1666666 [1:54:40<39:09, 206.14it/s]

finished frames 7093800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182429/1666666 [1:54:40<39:07, 206.27it/s]

finished frames 7094400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182534/1666666 [1:54:41<39:00, 206.87it/s]

finished frames 7095000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182639/1666666 [1:54:41<39:03, 206.52it/s]

finished frames 7095600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182723/1666666 [1:54:42<39:05, 206.37it/s]

finished frames 7096200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182828/1666666 [1:54:42<39:02, 206.57it/s]

finished frames 7096800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1182933/1666666 [1:54:43<39:00, 206.70it/s]

finished frames 7097400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183038/1666666 [1:54:43<39:46, 202.65it/s]

finished frames 7098000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183122/1666666 [1:54:44<39:16, 205.22it/s]

finished frames 7098600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183227/1666666 [1:54:44<38:38, 208.48it/s]

finished frames 7099200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183332/1666666 [1:54:45<38:47, 207.68it/s]

finished frames 7099800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183437/1666666 [1:54:45<38:46, 207.68it/s]

finished frames 7100400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183542/1666666 [1:54:46<38:46, 207.62it/s]

finished frames 7101000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183626/1666666 [1:54:46<38:47, 207.57it/s]

finished frames 7101600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183731/1666666 [1:54:47<39:52, 201.85it/s]

finished frames 7102200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183836/1666666 [1:54:47<39:02, 206.13it/s]

finished frames 7102800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1183941/1666666 [1:54:48<38:46, 207.51it/s]

finished frames 7103400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184025/1666666 [1:54:48<39:34, 203.22it/s]

finished frames 7104000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184130/1666666 [1:54:49<38:57, 206.39it/s]

finished frames 7104600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184237/1666666 [1:54:49<38:30, 208.84it/s]

finished frames 7105200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184343/1666666 [1:54:50<38:29, 208.82it/s]

finished frames 7105800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184427/1666666 [1:54:50<38:38, 208.02it/s]

finished frames 7106400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184532/1666666 [1:54:51<38:38, 207.95it/s]

finished frames 7107000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184637/1666666 [1:54:51<38:35, 208.20it/s]

finished frames 7107600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184743/1666666 [1:54:52<38:30, 208.62it/s]

finished frames 7108200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184827/1666666 [1:54:52<38:35, 208.07it/s]

finished frames 7108800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1184932/1666666 [1:54:52<38:34, 208.15it/s]

finished frames 7109400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185037/1666666 [1:54:53<39:28, 203.34it/s]

finished frames 7110000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185142/1666666 [1:54:54<38:45, 207.03it/s]

finished frames 7110600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185227/1666666 [1:54:54<38:29, 208.44it/s]

finished frames 7111200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185332/1666666 [1:54:54<38:31, 208.25it/s]

finished frames 7111800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185437/1666666 [1:54:55<38:39, 207.48it/s]

finished frames 7112400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185542/1666666 [1:54:55<38:37, 207.60it/s]

finished frames 7113000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185626/1666666 [1:54:56<38:36, 207.64it/s]

finished frames 7113600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185731/1666666 [1:54:56<38:32, 207.93it/s]

finished frames 7114200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185836/1666666 [1:54:57<38:25, 208.52it/s]

finished frames 7114800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1185942/1666666 [1:54:57<38:24, 208.64it/s]

finished frames 7115400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186026/1666666 [1:54:58<39:20, 203.61it/s]

finished frames 7116000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186131/1666666 [1:54:58<38:40, 207.12it/s]

finished frames 7116600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186236/1666666 [1:54:59<38:38, 207.23it/s]

finished frames 7117200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186343/1666666 [1:54:59<38:25, 208.33it/s]

finished frames 7117800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186429/1666666 [1:55:00<38:22, 208.53it/s]

finished frames 7118400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186536/1666666 [1:55:00<38:17, 209.01it/s]

finished frames 7119000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186641/1666666 [1:55:01<38:24, 208.26it/s]

finished frames 7119600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186726/1666666 [1:55:01<38:18, 208.82it/s]

finished frames 7120200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186831/1666666 [1:55:02<40:53, 195.58it/s]

finished frames 7120800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1186936/1666666 [1:55:02<38:51, 205.77it/s]

finished frames 7121400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1187020/1666666 [1:55:03<39:41, 201.42it/s]

finished frames 7122000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1187126/1666666 [1:55:03<38:32, 207.41it/s]

finished frames 7122600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1187231/1666666 [1:55:04<38:32, 207.34it/s]

finished frames 7123200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1187336/1666666 [1:55:04<38:24, 207.96it/s]

finished frames 7123800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████   | 1187441/1666666 [1:55:05<38:31, 207.30it/s]

finished frames 7124400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1187525/1666666 [1:55:05<38:28, 207.59it/s]

finished frames 7125000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1187631/1666666 [1:55:06<38:20, 208.19it/s]

finished frames 7125600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1187736/1666666 [1:55:06<38:25, 207.75it/s]

finished frames 7126200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1187841/1666666 [1:55:07<38:17, 208.37it/s]

finished frames 7126800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1187925/1666666 [1:55:07<38:21, 208.02it/s]

finished frames 7127400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188030/1666666 [1:55:07<39:23, 202.48it/s]

finished frames 7128000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188138/1666666 [1:55:08<38:09, 209.04it/s]

finished frames 7128600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188223/1666666 [1:55:08<38:19, 208.04it/s]

finished frames 7129200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188328/1666666 [1:55:09<38:18, 208.13it/s]

finished frames 7129800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188433/1666666 [1:55:09<38:16, 208.20it/s]

finished frames 7130400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188538/1666666 [1:55:10<38:13, 208.43it/s]

finished frames 7131000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188643/1666666 [1:55:10<38:11, 208.60it/s]

finished frames 7131600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188728/1666666 [1:55:11<38:13, 208.39it/s]

finished frames 7132200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188833/1666666 [1:55:11<38:14, 208.27it/s]

finished frames 7132800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1188938/1666666 [1:55:12<38:18, 207.83it/s]

finished frames 7133400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189022/1666666 [1:55:12<42:29, 187.35it/s]

finished frames 7134000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189127/1666666 [1:55:13<38:55, 204.43it/s]

finished frames 7134600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189232/1666666 [1:55:13<38:20, 207.49it/s]

finished frames 7135200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189337/1666666 [1:55:14<38:13, 208.08it/s]

finished frames 7135800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189442/1666666 [1:55:14<38:07, 208.64it/s]

finished frames 7136400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189526/1666666 [1:55:15<38:16, 207.77it/s]

finished frames 7137000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189631/1666666 [1:55:15<38:12, 208.09it/s]

finished frames 7137600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189736/1666666 [1:55:16<38:12, 208.05it/s]

finished frames 7138200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189842/1666666 [1:55:16<38:09, 208.30it/s]

finished frames 7138800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1189926/1666666 [1:55:17<38:11, 208.00it/s]

finished frames 7139400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190031/1666666 [1:55:17<38:54, 204.16it/s]

finished frames 7140000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190138/1666666 [1:55:18<38:04, 208.58it/s]

finished frames 7140600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190243/1666666 [1:55:18<38:07, 208.30it/s]

finished frames 7141200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190327/1666666 [1:55:19<38:10, 207.93it/s]

finished frames 7141800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190433/1666666 [1:55:19<38:01, 208.78it/s]

finished frames 7142400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190538/1666666 [1:55:20<38:04, 208.37it/s]

finished frames 7143000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190622/1666666 [1:55:20<38:16, 207.29it/s]

finished frames 7143600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190727/1666666 [1:55:20<37:59, 208.79it/s]

finished frames 7144200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190832/1666666 [1:55:21<38:03, 208.36it/s]

finished frames 7144800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1190940/1666666 [1:55:22<37:32, 211.24it/s]

finished frames 7145400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1191027/1666666 [1:55:22<38:39, 205.07it/s]

finished frames 7146000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1191132/1666666 [1:55:22<38:20, 206.73it/s]

finished frames 7146600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1191237/1666666 [1:55:23<38:11, 207.45it/s]

finished frames 7147200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1191342/1666666 [1:55:23<38:06, 207.86it/s]

finished frames 7147800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1191427/1666666 [1:55:24<39:13, 201.93it/s]

finished frames 7148400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1191532/1666666 [1:55:24<40:08, 197.29it/s]

finished frames 7149000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 71%|███████▏  | 1191637/1666666 [1:55:25<38:27, 205.86it/s]

finished frames 7149600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1191742/1666666 [1:55:25<38:09, 207.45it/s]

finished frames 7150200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1191826/1666666 [1:55:26<38:19, 206.46it/s]

finished frames 7150800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1191931/1666666 [1:55:26<38:08, 207.47it/s]

finished frames 7151400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192036/1666666 [1:55:27<39:06, 202.27it/s]

finished frames 7152000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192141/1666666 [1:55:27<38:14, 206.79it/s]

finished frames 7152600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192228/1666666 [1:55:28<37:47, 209.27it/s]

finished frames 7153200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192338/1666666 [1:55:28<37:11, 212.54it/s]

finished frames 7153800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192426/1666666 [1:55:29<37:00, 213.53it/s]

finished frames 7154400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192536/1666666 [1:55:29<36:54, 214.12it/s]

finished frames 7155000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192624/1666666 [1:55:30<36:59, 213.55it/s]

finished frames 7155600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192734/1666666 [1:55:30<36:58, 213.64it/s]

finished frames 7156200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192844/1666666 [1:55:31<36:55, 213.87it/s]

finished frames 7156800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1192932/1666666 [1:55:31<36:58, 213.51it/s]

finished frames 7157400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193042/1666666 [1:55:32<37:34, 210.12it/s]

finished frames 7158000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193130/1666666 [1:55:32<37:07, 212.56it/s]

finished frames 7158600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193240/1666666 [1:55:33<36:58, 213.42it/s]

finished frames 7159200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193328/1666666 [1:55:33<36:59, 213.28it/s]

finished frames 7159800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193438/1666666 [1:55:33<37:04, 212.76it/s]

finished frames 7160400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193526/1666666 [1:55:34<37:01, 213.03it/s]

finished frames 7161000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193636/1666666 [1:55:34<37:01, 212.91it/s]

finished frames 7161600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193724/1666666 [1:55:35<40:17, 195.66it/s]

finished frames 7162200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193832/1666666 [1:55:35<40:56, 192.50it/s]

finished frames 7162800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1193942/1666666 [1:55:36<37:32, 209.86it/s]

finished frames 7163400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194030/1666666 [1:55:36<38:06, 206.67it/s]

finished frames 7164000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194139/1666666 [1:55:37<37:21, 210.82it/s]

finished frames 7164600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194227/1666666 [1:55:37<37:14, 211.41it/s]

finished frames 7165200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194337/1666666 [1:55:38<37:05, 212.26it/s]

finished frames 7165800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194425/1666666 [1:55:38<37:01, 212.54it/s]

finished frames 7166400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194535/1666666 [1:55:39<37:06, 212.04it/s]

finished frames 7167000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194623/1666666 [1:55:39<37:07, 211.95it/s]

finished frames 7167600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194733/1666666 [1:55:40<36:59, 212.60it/s]

finished frames 7168200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194843/1666666 [1:55:40<36:44, 214.00it/s]

finished frames 7168800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1194931/1666666 [1:55:41<36:45, 213.86it/s]

finished frames 7169400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195041/1666666 [1:55:41<37:26, 209.97it/s]

finished frames 7170000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195129/1666666 [1:55:42<36:57, 212.66it/s]

finished frames 7170600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195239/1666666 [1:55:42<36:46, 213.62it/s]

finished frames 7171200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195327/1666666 [1:55:42<36:58, 212.50it/s]

finished frames 7171800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195437/1666666 [1:55:43<36:54, 212.83it/s]

finished frames 7172400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195525/1666666 [1:55:43<36:59, 212.27it/s]

finished frames 7173000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195635/1666666 [1:55:44<36:53, 212.82it/s]

finished frames 7173600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195723/1666666 [1:55:44<36:51, 212.96it/s]

finished frames 7174200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195833/1666666 [1:55:45<36:53, 212.75it/s]

finished frames 7174800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1195943/1666666 [1:55:45<36:57, 212.24it/s]

finished frames 7175400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196031/1666666 [1:55:46<37:46, 207.65it/s]

finished frames 7176000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196140/1666666 [1:55:46<38:36, 203.14it/s]

finished frames 7176600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196227/1666666 [1:55:47<37:20, 210.00it/s]

finished frames 7177200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196337/1666666 [1:55:47<36:39, 213.79it/s]

finished frames 7177800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196425/1666666 [1:55:48<36:38, 213.94it/s]

finished frames 7178400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196535/1666666 [1:55:48<36:46, 213.08it/s]

finished frames 7179000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196623/1666666 [1:55:49<36:52, 212.43it/s]

finished frames 7179600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196733/1666666 [1:55:49<36:46, 212.96it/s]

finished frames 7180200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196843/1666666 [1:55:50<36:42, 213.32it/s]

finished frames 7180800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1196931/1666666 [1:55:50<36:45, 212.96it/s]

finished frames 7181400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197019/1666666 [1:55:50<39:02, 200.49it/s]

finished frames 7182000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197124/1666666 [1:55:51<37:57, 206.19it/s]

finished frames 7182600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197229/1666666 [1:55:51<37:34, 208.27it/s]

finished frames 7183200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197334/1666666 [1:55:52<37:38, 207.81it/s]

finished frames 7183800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197439/1666666 [1:55:53<37:42, 207.40it/s]

finished frames 7184400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197523/1666666 [1:55:53<37:44, 207.21it/s]

finished frames 7185000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197628/1666666 [1:55:53<38:05, 205.24it/s]

finished frames 7185600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197737/1666666 [1:55:54<36:52, 211.94it/s]

finished frames 7186200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197824/1666666 [1:55:54<37:12, 210.01it/s]

finished frames 7186800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1197931/1666666 [1:55:55<37:57, 205.78it/s]

finished frames 7187400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198036/1666666 [1:55:55<38:40, 201.98it/s]

finished frames 7188000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198124/1666666 [1:55:56<37:10, 210.06it/s]

finished frames 7188600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198234/1666666 [1:55:56<36:37, 213.18it/s]

finished frames 7189200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198322/1666666 [1:55:57<36:59, 210.97it/s]

finished frames 7189800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198428/1666666 [1:55:57<38:25, 203.08it/s]

finished frames 7190400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198533/1666666 [1:55:58<39:00, 200.02it/s]

finished frames 7191000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198636/1666666 [1:55:58<39:37, 196.86it/s]

finished frames 7191600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198736/1666666 [1:55:59<39:47, 195.95it/s]

finished frames 7192200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198841/1666666 [1:55:59<38:34, 202.09it/s]

finished frames 7192800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1198925/1666666 [1:56:00<38:28, 202.63it/s]

finished frames 7193400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199033/1666666 [1:56:00<38:35, 201.95it/s]

finished frames 7194000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199143/1666666 [1:56:01<36:42, 212.29it/s]

finished frames 7194600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199231/1666666 [1:56:01<36:53, 211.14it/s]

finished frames 7195200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199341/1666666 [1:56:02<36:47, 211.69it/s]

finished frames 7195800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199428/1666666 [1:56:02<37:19, 208.66it/s]

finished frames 7196400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199533/1666666 [1:56:03<37:35, 207.11it/s]

finished frames 7197000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199638/1666666 [1:56:03<37:36, 206.96it/s]

finished frames 7197600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199722/1666666 [1:56:04<37:40, 206.54it/s]

finished frames 7198200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199827/1666666 [1:56:04<37:44, 206.17it/s]

finished frames 7198800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1199933/1666666 [1:56:05<37:29, 207.45it/s]

finished frames 7199400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200039/1666666 [1:56:05<38:05, 204.16it/s]

finished frames 7200000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200123/1666666 [1:56:06<37:48, 205.69it/s]

finished frames 7200600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200228/1666666 [1:56:06<37:39, 206.47it/s]

finished frames 7201200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200333/1666666 [1:56:07<37:35, 206.79it/s]

finished frames 7201800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200438/1666666 [1:56:07<37:31, 207.09it/s]

finished frames 7202400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200522/1666666 [1:56:07<37:43, 205.98it/s]

finished frames 7203000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200628/1666666 [1:56:08<37:14, 208.52it/s]

finished frames 7203600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200733/1666666 [1:56:08<37:29, 207.09it/s]

finished frames 7204200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200838/1666666 [1:56:09<38:44, 200.43it/s]

finished frames 7204800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1200922/1666666 [1:56:09<40:13, 193.01it/s]

finished frames 7205400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201027/1666666 [1:56:10<38:31, 201.47it/s]

finished frames 7206000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201132/1666666 [1:56:11<37:48, 205.18it/s]

finished frames 7206600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201237/1666666 [1:56:11<37:44, 205.51it/s]

finished frames 7207200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201322/1666666 [1:56:11<37:09, 208.67it/s]

finished frames 7207800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201432/1666666 [1:56:12<36:44, 211.05it/s]

finished frames 7208400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201538/1666666 [1:56:12<37:17, 207.92it/s]

finished frames 7209000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201622/1666666 [1:56:13<37:36, 206.08it/s]

finished frames 7209600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201727/1666666 [1:56:13<37:43, 205.40it/s]

finished frames 7210200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201832/1666666 [1:56:14<37:38, 205.85it/s]

finished frames 7210800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1201937/1666666 [1:56:14<37:43, 205.34it/s]

finished frames 7211400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202021/1666666 [1:56:15<38:55, 198.98it/s]

finished frames 7212000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202126/1666666 [1:56:15<37:36, 205.91it/s]

finished frames 7212600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202231/1666666 [1:56:16<37:35, 205.88it/s]

finished frames 7213200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202336/1666666 [1:56:16<37:29, 206.43it/s]

finished frames 7213800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202441/1666666 [1:56:17<37:32, 206.09it/s]

finished frames 7214400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202525/1666666 [1:56:17<37:36, 205.72it/s]

finished frames 7215000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202630/1666666 [1:56:18<37:35, 205.73it/s]

finished frames 7215600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202735/1666666 [1:56:18<37:33, 205.91it/s]

finished frames 7216200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202840/1666666 [1:56:19<37:20, 207.06it/s]

finished frames 7216800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1202924/1666666 [1:56:19<37:33, 205.75it/s]

finished frames 7217400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203029/1666666 [1:56:20<38:27, 200.93it/s]

finished frames 7218000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203134/1666666 [1:56:20<39:38, 194.91it/s]

finished frames 7218600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203239/1666666 [1:56:21<39:16, 196.68it/s]

finished frames 7219200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203323/1666666 [1:56:21<37:59, 203.28it/s]

finished frames 7219800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203432/1666666 [1:56:22<36:41, 210.40it/s]

finished frames 7220400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203542/1666666 [1:56:22<36:15, 212.87it/s]

finished frames 7221000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203630/1666666 [1:56:23<36:21, 212.24it/s]

finished frames 7221600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203740/1666666 [1:56:23<36:14, 212.88it/s]

finished frames 7222200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203828/1666666 [1:56:24<36:12, 213.02it/s]

finished frames 7222800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1203938/1666666 [1:56:24<36:10, 213.15it/s]

finished frames 7223400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204026/1666666 [1:56:25<37:03, 208.04it/s]

finished frames 7224000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204136/1666666 [1:56:25<36:15, 212.65it/s]

finished frames 7224600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204224/1666666 [1:56:26<36:16, 212.49it/s]

finished frames 7225200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204334/1666666 [1:56:26<36:12, 212.84it/s]

finished frames 7225800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204444/1666666 [1:56:27<36:10, 212.99it/s]

finished frames 7226400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204532/1666666 [1:56:27<36:28, 211.20it/s]

finished frames 7227000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204640/1666666 [1:56:27<36:52, 208.82it/s]

finished frames 7227600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204724/1666666 [1:56:28<37:05, 207.59it/s]

finished frames 7228200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204829/1666666 [1:56:28<37:08, 207.22it/s]

finished frames 7228800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1204934/1666666 [1:56:29<37:01, 207.82it/s]

finished frames 7229400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205039/1666666 [1:56:29<37:52, 203.12it/s]

finished frames 7230000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205123/1666666 [1:56:30<37:27, 205.37it/s]

finished frames 7230600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205229/1666666 [1:56:30<37:07, 207.15it/s]

finished frames 7231200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205334/1666666 [1:56:31<37:04, 207.43it/s]

finished frames 7231800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205418/1666666 [1:56:31<40:21, 190.46it/s]

finished frames 7232400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205523/1666666 [1:56:32<40:41, 188.85it/s]

finished frames 7233000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205631/1666666 [1:56:32<37:02, 207.47it/s]

finished frames 7233600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205741/1666666 [1:56:33<36:23, 211.11it/s]

finished frames 7234200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205829/1666666 [1:56:33<36:16, 211.77it/s]

finished frames 7234800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1205939/1666666 [1:56:34<36:09, 212.33it/s]

finished frames 7235400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206026/1666666 [1:56:34<37:08, 206.70it/s]

finished frames 7236000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206136/1666666 [1:56:35<36:14, 211.81it/s]

finished frames 7236600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206224/1666666 [1:56:35<36:10, 212.14it/s]

finished frames 7237200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206334/1666666 [1:56:36<36:05, 212.55it/s]

finished frames 7237800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206422/1666666 [1:56:36<36:05, 212.57it/s]

finished frames 7238400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206532/1666666 [1:56:37<36:01, 212.85it/s]

finished frames 7239000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206642/1666666 [1:56:37<35:54, 213.49it/s]

finished frames 7239600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206730/1666666 [1:56:38<35:55, 213.39it/s]

finished frames 7240200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206840/1666666 [1:56:38<35:54, 213.44it/s]

finished frames 7240800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1206928/1666666 [1:56:38<36:05, 212.31it/s]

finished frames 7241400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207038/1666666 [1:56:39<36:55, 207.44it/s]

finished frames 7242000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207124/1666666 [1:56:39<36:25, 210.27it/s]

finished frames 7242600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207234/1666666 [1:56:40<36:07, 211.93it/s]

finished frames 7243200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207322/1666666 [1:56:40<36:11, 211.57it/s]

finished frames 7243800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207432/1666666 [1:56:41<36:07, 211.84it/s]

finished frames 7244400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207542/1666666 [1:56:41<36:08, 211.77it/s]

finished frames 7245000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207630/1666666 [1:56:42<36:07, 211.82it/s]

finished frames 7245600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207740/1666666 [1:56:42<36:25, 210.01it/s]

finished frames 7246200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207827/1666666 [1:56:43<37:38, 203.17it/s]

finished frames 7246800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1207937/1666666 [1:56:43<36:01, 212.20it/s]

finished frames 7247400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1208025/1666666 [1:56:44<36:30, 209.38it/s]

finished frames 7248000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1208135/1666666 [1:56:44<35:51, 213.08it/s]

finished frames 7248600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 72%|███████▏  | 1208223/1666666 [1:56:45<35:35, 214.66it/s]

finished frames 7249200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1208333/1666666 [1:56:45<35:38, 214.36it/s]

finished frames 7249800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1208443/1666666 [1:56:46<35:46, 213.51it/s]

finished frames 7250400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1208531/1666666 [1:56:46<35:42, 213.79it/s]

finished frames 7251000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1208641/1666666 [1:56:47<35:39, 214.08it/s]

finished frames 7251600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1208729/1666666 [1:56:47<35:47, 213.25it/s]

finished frames 7252200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1208839/1666666 [1:56:47<35:38, 214.06it/s]

finished frames 7252800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1208927/1666666 [1:56:48<35:40, 213.89it/s]

finished frames 7253400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209037/1666666 [1:56:48<36:19, 209.99it/s]

finished frames 7254000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209125/1666666 [1:56:49<35:44, 213.39it/s]

finished frames 7254600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209235/1666666 [1:56:49<35:19, 215.85it/s]

finished frames 7255200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209323/1666666 [1:56:50<35:27, 215.00it/s]

finished frames 7255800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209433/1666666 [1:56:50<35:30, 214.62it/s]

finished frames 7256400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209543/1666666 [1:56:51<36:04, 211.20it/s]

finished frames 7257000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209629/1666666 [1:56:51<36:25, 209.10it/s]

finished frames 7257600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209736/1666666 [1:56:52<36:22, 209.33it/s]

finished frames 7258200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209843/1666666 [1:56:52<36:23, 209.22it/s]

finished frames 7258800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1209928/1666666 [1:56:53<36:36, 207.95it/s]

finished frames 7259400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210034/1666666 [1:56:53<37:04, 205.30it/s]

finished frames 7260000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210144/1666666 [1:56:54<35:39, 213.34it/s]

finished frames 7260600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210232/1666666 [1:56:54<36:38, 207.57it/s]

finished frames 7261200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210342/1666666 [1:56:55<37:20, 203.67it/s]

finished frames 7261800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210429/1666666 [1:56:55<36:08, 210.35it/s]

finished frames 7262400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210539/1666666 [1:56:56<35:41, 213.02it/s]

finished frames 7263000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210627/1666666 [1:56:56<35:38, 213.29it/s]

finished frames 7263600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210737/1666666 [1:56:57<35:39, 213.10it/s]

finished frames 7264200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210825/1666666 [1:56:57<35:37, 213.21it/s]

finished frames 7264800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1210935/1666666 [1:56:57<35:31, 213.79it/s]

finished frames 7265400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211023/1666666 [1:56:58<36:24, 208.55it/s]

finished frames 7266000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211133/1666666 [1:56:58<35:37, 213.14it/s]

finished frames 7266600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211243/1666666 [1:56:59<35:38, 212.98it/s]

finished frames 7267200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211331/1666666 [1:56:59<35:36, 213.08it/s]

finished frames 7267800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211441/1666666 [1:57:00<35:33, 213.42it/s]

finished frames 7268400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211529/1666666 [1:57:00<35:33, 213.34it/s]

finished frames 7269000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211639/1666666 [1:57:01<35:29, 213.65it/s]

finished frames 7269600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211727/1666666 [1:57:01<35:25, 214.00it/s]

finished frames 7270200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211837/1666666 [1:57:02<35:27, 213.79it/s]

finished frames 7270800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1211925/1666666 [1:57:02<35:28, 213.59it/s]

finished frames 7271400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212035/1666666 [1:57:03<36:32, 207.39it/s]

finished frames 7272000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212143/1666666 [1:57:03<35:50, 211.32it/s]

finished frames 7272600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212231/1666666 [1:57:04<35:43, 212.00it/s]

finished frames 7273200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212341/1666666 [1:57:04<35:38, 212.42it/s]

finished frames 7273800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212429/1666666 [1:57:04<35:43, 211.87it/s]

finished frames 7274400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212539/1666666 [1:57:05<35:33, 212.84it/s]

finished frames 7275000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212627/1666666 [1:57:05<36:29, 207.37it/s]

finished frames 7275600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212736/1666666 [1:57:06<36:40, 206.29it/s]

finished frames 7276200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212824/1666666 [1:57:06<35:47, 211.29it/s]

finished frames 7276800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1212934/1666666 [1:57:07<35:23, 213.68it/s]

finished frames 7277400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213022/1666666 [1:57:07<36:38, 206.34it/s]

finished frames 7278000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213131/1666666 [1:57:08<35:42, 211.71it/s]

finished frames 7278600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213241/1666666 [1:57:08<35:31, 212.71it/s]

finished frames 7279200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213329/1666666 [1:57:09<35:35, 212.24it/s]

finished frames 7279800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213439/1666666 [1:57:09<35:33, 212.40it/s]

finished frames 7280400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213527/1666666 [1:57:10<35:38, 211.87it/s]

finished frames 7281000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213637/1666666 [1:57:10<35:32, 212.46it/s]

finished frames 7281600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213725/1666666 [1:57:11<35:29, 212.66it/s]

finished frames 7282200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213835/1666666 [1:57:11<35:16, 213.96it/s]

finished frames 7282800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1213923/1666666 [1:57:12<35:27, 212.84it/s]

finished frames 7283400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214033/1666666 [1:57:12<36:08, 208.75it/s]

finished frames 7284000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214142/1666666 [1:57:13<35:31, 212.27it/s]

finished frames 7284600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214230/1666666 [1:57:13<35:27, 212.63it/s]

finished frames 7285200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214340/1666666 [1:57:14<35:24, 212.89it/s]

finished frames 7285800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214428/1666666 [1:57:14<35:30, 212.25it/s]

finished frames 7286400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214538/1666666 [1:57:15<35:22, 212.97it/s]

finished frames 7287000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214626/1666666 [1:57:15<35:26, 212.55it/s]

finished frames 7287600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214736/1666666 [1:57:15<35:30, 212.14it/s]

finished frames 7288200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214824/1666666 [1:57:16<35:24, 212.68it/s]

finished frames 7288800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1214934/1666666 [1:57:16<38:13, 196.92it/s]

finished frames 7289400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215020/1666666 [1:57:17<37:22, 201.41it/s]

finished frames 7290000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215125/1666666 [1:57:17<36:40, 205.19it/s]

finished frames 7290600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215230/1666666 [1:57:18<36:28, 206.27it/s]

finished frames 7291200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215337/1666666 [1:57:18<36:05, 208.42it/s]

finished frames 7291800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215423/1666666 [1:57:19<35:57, 209.16it/s]

finished frames 7292400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215531/1666666 [1:57:19<35:48, 209.93it/s]

finished frames 7293000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215641/1666666 [1:57:20<35:28, 211.94it/s]

finished frames 7293600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215729/1666666 [1:57:20<35:36, 211.11it/s]

finished frames 7294200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215839/1666666 [1:57:21<35:27, 211.93it/s]

finished frames 7294800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1215927/1666666 [1:57:21<35:36, 210.93it/s]

finished frames 7295400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216037/1666666 [1:57:22<36:14, 207.28it/s]

finished frames 7296000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216124/1666666 [1:57:22<35:45, 210.02it/s]

finished frames 7296600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216234/1666666 [1:57:23<35:30, 211.41it/s]

finished frames 7297200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216322/1666666 [1:57:23<35:31, 211.31it/s]

finished frames 7297800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216432/1666666 [1:57:24<35:16, 212.76it/s]

finished frames 7298400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216542/1666666 [1:57:24<34:53, 214.97it/s]

finished frames 7299000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216630/1666666 [1:57:24<34:56, 214.67it/s]

finished frames 7299600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216740/1666666 [1:57:25<34:51, 215.09it/s]

finished frames 7300200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216828/1666666 [1:57:25<34:57, 214.42it/s]

finished frames 7300800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1216938/1666666 [1:57:26<34:56, 214.54it/s]

finished frames 7301400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217026/1666666 [1:57:26<35:41, 209.98it/s]

finished frames 7302000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217136/1666666 [1:57:27<35:18, 212.16it/s]

finished frames 7302600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217224/1666666 [1:57:27<35:18, 212.14it/s]

finished frames 7303200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217334/1666666 [1:57:28<38:20, 195.36it/s]

finished frames 7303800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217421/1666666 [1:57:28<39:09, 191.20it/s]

finished frames 7304400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217530/1666666 [1:57:29<35:55, 208.37it/s]

finished frames 7305000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217640/1666666 [1:57:29<35:23, 211.47it/s]

finished frames 7305600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217728/1666666 [1:57:30<35:22, 211.49it/s]

finished frames 7306200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217838/1666666 [1:57:30<35:14, 212.24it/s]

finished frames 7306800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1217926/1666666 [1:57:31<35:13, 212.28it/s]

finished frames 7307400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218036/1666666 [1:57:31<35:57, 207.95it/s]

finished frames 7308000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218124/1666666 [1:57:32<35:25, 211.01it/s]

finished frames 7308600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218234/1666666 [1:57:32<35:17, 211.73it/s]

finished frames 7309200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218344/1666666 [1:57:33<35:04, 213.01it/s]

finished frames 7309800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218432/1666666 [1:57:33<35:10, 212.37it/s]

finished frames 7310400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218542/1666666 [1:57:34<35:00, 213.31it/s]

finished frames 7311000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218630/1666666 [1:57:34<35:07, 212.54it/s]

finished frames 7311600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218740/1666666 [1:57:35<35:11, 212.16it/s]

finished frames 7312200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218828/1666666 [1:57:35<35:16, 211.55it/s]

finished frames 7312800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1218938/1666666 [1:57:35<35:14, 211.76it/s]

finished frames 7313400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219024/1666666 [1:57:36<37:06, 201.06it/s]

finished frames 7314000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219133/1666666 [1:57:36<35:20, 211.02it/s]

finished frames 7314600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219243/1666666 [1:57:37<35:05, 212.51it/s]

finished frames 7315200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219331/1666666 [1:57:37<35:04, 212.57it/s]

finished frames 7315800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219441/1666666 [1:57:38<35:02, 212.73it/s]

finished frames 7316400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219529/1666666 [1:57:38<35:13, 211.57it/s]

finished frames 7317000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219639/1666666 [1:57:39<35:05, 212.32it/s]

finished frames 7317600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219727/1666666 [1:57:39<37:06, 200.71it/s]

finished frames 7318200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219836/1666666 [1:57:40<35:36, 209.09it/s]

finished frames 7318800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1219924/1666666 [1:57:40<35:15, 211.13it/s]

finished frames 7319400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220034/1666666 [1:57:41<36:00, 206.69it/s]

finished frames 7320000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220142/1666666 [1:57:41<35:14, 211.18it/s]

finished frames 7320600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220230/1666666 [1:57:42<35:09, 211.64it/s]

finished frames 7321200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220340/1666666 [1:57:42<34:55, 212.97it/s]

finished frames 7321800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220428/1666666 [1:57:43<34:59, 212.55it/s]

finished frames 7322400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220538/1666666 [1:57:43<35:15, 210.84it/s]

finished frames 7323000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220624/1666666 [1:57:44<35:45, 207.94it/s]

finished frames 7323600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220729/1666666 [1:57:44<35:39, 208.40it/s]

finished frames 7324200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220834/1666666 [1:57:45<36:08, 205.64it/s]

finished frames 7324800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1220940/1666666 [1:57:45<35:45, 207.77it/s]

finished frames 7325400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221024/1666666 [1:57:45<36:23, 204.10it/s]

finished frames 7326000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221129/1666666 [1:57:46<35:52, 206.99it/s]

finished frames 7326600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221234/1666666 [1:57:46<35:52, 206.93it/s]

finished frames 7327200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221339/1666666 [1:57:47<35:58, 206.33it/s]

finished frames 7327800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221423/1666666 [1:57:47<35:56, 206.45it/s]

finished frames 7328400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221528/1666666 [1:57:48<35:57, 206.37it/s]

finished frames 7329000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221633/1666666 [1:57:48<35:49, 207.04it/s]

finished frames 7329600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221738/1666666 [1:57:49<35:51, 206.83it/s]

finished frames 7330200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221822/1666666 [1:57:49<35:56, 206.32it/s]

finished frames 7330800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1221927/1666666 [1:57:50<35:59, 205.96it/s]

finished frames 7331400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222032/1666666 [1:57:50<38:45, 191.17it/s]

finished frames 7332000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222116/1666666 [1:57:51<39:41, 186.66it/s]

finished frames 7332600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222243/1666666 [1:57:51<36:04, 205.37it/s]

finished frames 7333200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222328/1666666 [1:57:52<35:45, 207.08it/s]

finished frames 7333800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222433/1666666 [1:57:52<35:33, 208.22it/s]

finished frames 7334400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222539/1666666 [1:57:53<35:26, 208.87it/s]

finished frames 7335000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222623/1666666 [1:57:53<36:12, 204.36it/s]

finished frames 7335600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222732/1666666 [1:57:54<34:31, 214.26it/s]

finished frames 7336200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222842/1666666 [1:57:54<34:20, 215.41it/s]

finished frames 7336800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1222930/1666666 [1:57:55<34:47, 212.57it/s]

finished frames 7337400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223037/1666666 [1:57:55<36:02, 205.18it/s]

finished frames 7338000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223125/1666666 [1:57:56<34:43, 212.89it/s]

finished frames 7338600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223235/1666666 [1:57:56<33:57, 217.64it/s]

finished frames 7339200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223323/1666666 [1:57:57<33:50, 218.31it/s]

finished frames 7339800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223433/1666666 [1:57:57<34:00, 217.17it/s]

finished frames 7340400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223543/1666666 [1:57:58<34:27, 214.34it/s]

finished frames 7341000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223631/1666666 [1:57:58<34:03, 216.81it/s]

finished frames 7341600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223741/1666666 [1:57:58<34:59, 211.00it/s]

finished frames 7342200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223829/1666666 [1:57:59<34:41, 212.78it/s]

finished frames 7342800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1223939/1666666 [1:57:59<34:21, 214.75it/s]

finished frames 7343400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224027/1666666 [1:58:00<35:23, 208.47it/s]

finished frames 7344000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224137/1666666 [1:58:00<34:48, 211.86it/s]

finished frames 7344600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224225/1666666 [1:58:01<34:54, 211.27it/s]

finished frames 7345200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224335/1666666 [1:58:01<35:06, 210.00it/s]

finished frames 7345800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224441/1666666 [1:58:02<36:02, 204.50it/s]

finished frames 7346400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224526/1666666 [1:58:02<35:24, 208.08it/s]

finished frames 7347000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224631/1666666 [1:58:03<35:17, 208.80it/s]

finished frames 7347600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224738/1666666 [1:58:03<35:09, 209.49it/s]

finished frames 7348200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224822/1666666 [1:58:04<35:28, 207.54it/s]

finished frames 7348800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 73%|███████▎  | 1224927/1666666 [1:58:04<35:31, 207.26it/s]

finished frames 7349400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225032/1666666 [1:58:05<36:08, 203.70it/s]

finished frames 7350000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225138/1666666 [1:58:05<35:20, 208.23it/s]

finished frames 7350600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225222/1666666 [1:58:06<35:25, 207.66it/s]

finished frames 7351200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225329/1666666 [1:58:06<35:03, 209.78it/s]

finished frames 7351800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225436/1666666 [1:58:07<35:00, 210.01it/s]

finished frames 7352400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225522/1666666 [1:58:07<35:00, 209.99it/s]

finished frames 7353000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225630/1666666 [1:58:08<35:06, 209.35it/s]

finished frames 7353600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225735/1666666 [1:58:08<35:21, 207.85it/s]

finished frames 7354200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225841/1666666 [1:58:09<35:19, 207.95it/s]

finished frames 7354800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1225925/1666666 [1:58:09<35:20, 207.87it/s]

finished frames 7355400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226030/1666666 [1:58:09<36:03, 203.64it/s]

finished frames 7356000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226137/1666666 [1:58:10<35:15, 208.25it/s]

finished frames 7356600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226243/1666666 [1:58:10<35:08, 208.88it/s]

finished frames 7357200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226327/1666666 [1:58:11<35:18, 207.82it/s]

finished frames 7357800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226432/1666666 [1:58:11<35:19, 207.69it/s]

finished frames 7358400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226537/1666666 [1:58:12<35:15, 208.01it/s]

finished frames 7359000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226642/1666666 [1:58:12<35:12, 208.28it/s]

finished frames 7359600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226726/1666666 [1:58:13<38:26, 190.71it/s]

finished frames 7360200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226834/1666666 [1:58:13<38:45, 189.15it/s]

finished frames 7360800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1226939/1666666 [1:58:14<35:41, 205.31it/s]

finished frames 7361400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227023/1666666 [1:58:14<36:10, 202.57it/s]

finished frames 7362000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227128/1666666 [1:58:15<35:25, 206.82it/s]

finished frames 7362600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227233/1666666 [1:58:15<35:22, 207.04it/s]

finished frames 7363200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227338/1666666 [1:58:16<35:23, 206.88it/s]

finished frames 7363800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227422/1666666 [1:58:16<35:25, 206.69it/s]

finished frames 7364400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227527/1666666 [1:58:17<35:25, 206.56it/s]

finished frames 7365000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227632/1666666 [1:58:17<35:17, 207.36it/s]

finished frames 7365600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227737/1666666 [1:58:18<35:22, 206.80it/s]

finished frames 7366200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227842/1666666 [1:58:18<35:21, 206.85it/s]

finished frames 7366800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1227926/1666666 [1:58:19<35:24, 206.54it/s]

finished frames 7367400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228031/1666666 [1:58:19<36:09, 202.22it/s]

finished frames 7368000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228136/1666666 [1:58:20<35:30, 205.80it/s]

finished frames 7368600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228241/1666666 [1:58:20<35:22, 206.54it/s]

finished frames 7369200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228325/1666666 [1:58:21<35:16, 207.13it/s]

finished frames 7369800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228430/1666666 [1:58:21<35:06, 208.08it/s]

finished frames 7370400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228535/1666666 [1:58:22<35:13, 207.34it/s]

finished frames 7371000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228640/1666666 [1:58:22<35:06, 207.93it/s]

finished frames 7371600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228724/1666666 [1:58:23<35:03, 208.18it/s]

finished frames 7372200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228831/1666666 [1:58:23<34:37, 210.76it/s]

finished frames 7372800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1228941/1666666 [1:58:24<34:24, 212.05it/s]

finished frames 7373400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1229029/1666666 [1:58:24<35:02, 208.14it/s]

finished frames 7374000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▎  | 1229138/1666666 [1:58:25<35:08, 207.55it/s]

finished frames 7374600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1229225/1666666 [1:58:25<36:40, 198.83it/s]

finished frames 7375200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1229335/1666666 [1:58:26<34:43, 209.89it/s]

finished frames 7375800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1229444/1666666 [1:58:26<34:24, 211.77it/s]

finished frames 7376400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1229532/1666666 [1:58:26<34:22, 211.91it/s]

finished frames 7377000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1229642/1666666 [1:58:27<34:10, 213.14it/s]

finished frames 7377600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1229730/1666666 [1:58:27<34:20, 212.07it/s]

finished frames 7378200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1229840/1666666 [1:58:28<34:17, 212.34it/s]

finished frames 7378800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1229928/1666666 [1:58:28<34:13, 212.64it/s]

finished frames 7379400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230037/1666666 [1:58:29<35:31, 204.85it/s]

finished frames 7380000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230142/1666666 [1:58:29<35:06, 207.21it/s]

finished frames 7380600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230226/1666666 [1:58:30<35:10, 206.81it/s]

finished frames 7381200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230331/1666666 [1:58:30<35:04, 207.29it/s]

finished frames 7381800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230436/1666666 [1:58:31<35:03, 207.42it/s]

finished frames 7382400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230541/1666666 [1:58:31<35:03, 207.34it/s]

finished frames 7383000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230625/1666666 [1:58:32<35:04, 207.17it/s]

finished frames 7383600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230730/1666666 [1:58:32<34:59, 207.60it/s]

finished frames 7384200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230835/1666666 [1:58:33<35:01, 207.38it/s]

finished frames 7384800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1230940/1666666 [1:58:33<34:57, 207.76it/s]

finished frames 7385400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231024/1666666 [1:58:34<35:42, 203.37it/s]

finished frames 7386000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231129/1666666 [1:58:34<35:05, 206.89it/s]

finished frames 7386600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231234/1666666 [1:58:35<34:56, 207.70it/s]

finished frames 7387200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231339/1666666 [1:58:35<34:55, 207.72it/s]

finished frames 7387800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231424/1666666 [1:58:36<36:21, 199.49it/s]

finished frames 7388400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231529/1666666 [1:58:36<37:20, 194.25it/s]

finished frames 7389000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231634/1666666 [1:58:37<35:20, 205.16it/s]

finished frames 7389600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231739/1666666 [1:58:37<34:54, 207.61it/s]

finished frames 7390200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231823/1666666 [1:58:38<34:55, 207.47it/s]

finished frames 7390800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1231929/1666666 [1:58:38<34:41, 208.81it/s]

finished frames 7391400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232034/1666666 [1:58:39<35:23, 204.65it/s]

finished frames 7392000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232140/1666666 [1:58:39<34:48, 208.02it/s]

finished frames 7392600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232225/1666666 [1:58:39<34:45, 208.30it/s]

finished frames 7393200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232330/1666666 [1:58:40<34:38, 208.92it/s]

finished frames 7393800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232435/1666666 [1:58:40<34:43, 208.42it/s]

finished frames 7394400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232540/1666666 [1:58:41<34:46, 208.08it/s]

finished frames 7395000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232624/1666666 [1:58:41<34:47, 207.91it/s]

finished frames 7395600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232729/1666666 [1:58:42<34:54, 207.22it/s]

finished frames 7396200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232834/1666666 [1:58:42<34:44, 208.15it/s]

finished frames 7396800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1232940/1666666 [1:58:43<34:40, 208.45it/s]

finished frames 7397400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233024/1666666 [1:58:43<35:35, 203.10it/s]

finished frames 7398000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233129/1666666 [1:58:44<34:54, 207.04it/s]

finished frames 7398600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233234/1666666 [1:58:44<34:44, 207.92it/s]

finished frames 7399200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233339/1666666 [1:58:45<34:40, 208.26it/s]

finished frames 7399800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233423/1666666 [1:58:45<34:43, 207.90it/s]

finished frames 7400400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233528/1666666 [1:58:46<34:52, 206.96it/s]

finished frames 7401000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233633/1666666 [1:58:46<34:42, 207.98it/s]

finished frames 7401600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233717/1666666 [1:58:47<37:52, 190.48it/s]

finished frames 7402200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233822/1666666 [1:58:47<38:14, 188.67it/s]

finished frames 7402800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1233927/1666666 [1:58:48<35:11, 204.92it/s]

finished frames 7403400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234032/1666666 [1:58:48<35:29, 203.14it/s]

finished frames 7404000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234137/1666666 [1:58:49<34:45, 207.40it/s]

finished frames 7404600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234242/1666666 [1:58:49<34:41, 207.77it/s]

finished frames 7405200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234326/1666666 [1:58:50<34:41, 207.74it/s]

finished frames 7405800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234431/1666666 [1:58:50<34:36, 208.20it/s]

finished frames 7406400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234536/1666666 [1:58:51<34:31, 208.57it/s]

finished frames 7407000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234642/1666666 [1:58:51<34:27, 208.96it/s]

finished frames 7407600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234727/1666666 [1:58:52<34:32, 208.42it/s]

finished frames 7408200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234832/1666666 [1:58:52<34:31, 208.48it/s]

finished frames 7408800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1234938/1666666 [1:58:53<34:28, 208.75it/s]

finished frames 7409400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235022/1666666 [1:58:53<35:31, 202.47it/s]

finished frames 7410000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235128/1666666 [1:58:54<34:35, 207.96it/s]

finished frames 7410600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235233/1666666 [1:58:54<34:33, 208.08it/s]

finished frames 7411200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235339/1666666 [1:58:55<34:27, 208.64it/s]

finished frames 7411800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235423/1666666 [1:58:55<34:41, 207.20it/s]

finished frames 7412400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235528/1666666 [1:58:55<34:36, 207.61it/s]

finished frames 7413000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235634/1666666 [1:58:56<34:29, 208.29it/s]

finished frames 7413600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235739/1666666 [1:58:56<34:26, 208.56it/s]

finished frames 7414200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235824/1666666 [1:58:57<34:29, 208.15it/s]

finished frames 7414800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1235930/1666666 [1:58:57<34:19, 209.19it/s]

finished frames 7415400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236014/1666666 [1:58:58<35:29, 202.26it/s]

finished frames 7416000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236140/1666666 [1:58:58<35:09, 204.11it/s]

finished frames 7416600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236224/1666666 [1:58:59<35:44, 200.71it/s]

finished frames 7417200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236331/1666666 [1:58:59<34:25, 208.36it/s]

finished frames 7417800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236436/1666666 [1:59:00<34:30, 207.77it/s]

finished frames 7418400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236542/1666666 [1:59:00<34:19, 208.81it/s]

finished frames 7419000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236626/1666666 [1:59:01<34:29, 207.76it/s]

finished frames 7419600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236732/1666666 [1:59:01<34:16, 209.11it/s]

finished frames 7420200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236837/1666666 [1:59:02<34:22, 208.36it/s]

finished frames 7420800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1236942/1666666 [1:59:02<34:20, 208.60it/s]

finished frames 7421400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237026/1666666 [1:59:03<35:10, 203.53it/s]

finished frames 7422000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237132/1666666 [1:59:03<34:26, 207.81it/s]

finished frames 7422600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237237/1666666 [1:59:04<34:17, 208.73it/s]

finished frames 7423200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237342/1666666 [1:59:04<34:19, 208.42it/s]

finished frames 7423800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237426/1666666 [1:59:05<34:21, 208.22it/s]

finished frames 7424400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237531/1666666 [1:59:05<34:20, 208.31it/s]

finished frames 7425000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237636/1666666 [1:59:06<34:17, 208.50it/s]

finished frames 7425600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237741/1666666 [1:59:06<34:18, 208.38it/s]

finished frames 7426200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237826/1666666 [1:59:07<34:22, 207.88it/s]

finished frames 7426800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1237931/1666666 [1:59:07<34:19, 208.21it/s]

finished frames 7427400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238036/1666666 [1:59:08<35:03, 203.76it/s]

finished frames 7428000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238141/1666666 [1:59:08<34:22, 207.78it/s]

finished frames 7428600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238226/1666666 [1:59:09<34:20, 207.97it/s]

finished frames 7429200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238331/1666666 [1:59:09<34:16, 208.31it/s]

finished frames 7429800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238436/1666666 [1:59:10<35:29, 201.08it/s]

finished frames 7430400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238541/1666666 [1:59:10<36:10, 197.29it/s]

finished frames 7431000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238625/1666666 [1:59:10<34:55, 204.27it/s]

finished frames 7431600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238730/1666666 [1:59:11<34:32, 206.47it/s]

finished frames 7432200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238835/1666666 [1:59:12<34:27, 206.94it/s]

finished frames 7432800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1238940/1666666 [1:59:12<34:23, 207.24it/s]

finished frames 7433400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239024/1666666 [1:59:12<35:16, 202.08it/s]

finished frames 7434000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239129/1666666 [1:59:13<34:32, 206.27it/s]

finished frames 7434600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239234/1666666 [1:59:13<34:22, 207.23it/s]

finished frames 7435200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239339/1666666 [1:59:14<34:23, 207.06it/s]

finished frames 7435800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239423/1666666 [1:59:14<34:28, 206.59it/s]

finished frames 7436400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239528/1666666 [1:59:15<34:14, 207.91it/s]

finished frames 7437000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239633/1666666 [1:59:15<34:20, 207.25it/s]

finished frames 7437600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239738/1666666 [1:59:16<34:20, 207.17it/s]

finished frames 7438200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239822/1666666 [1:59:16<34:26, 206.59it/s]

finished frames 7438800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1239927/1666666 [1:59:17<34:14, 207.75it/s]

finished frames 7439400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240032/1666666 [1:59:17<35:00, 203.13it/s]

finished frames 7440000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240137/1666666 [1:59:18<34:27, 206.26it/s]

finished frames 7440600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240242/1666666 [1:59:18<34:17, 207.21it/s]

finished frames 7441200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240326/1666666 [1:59:19<34:18, 207.14it/s]

finished frames 7441800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240431/1666666 [1:59:19<34:13, 207.56it/s]

finished frames 7442400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240536/1666666 [1:59:20<34:10, 207.84it/s]

finished frames 7443000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240641/1666666 [1:59:20<34:09, 207.89it/s]

finished frames 7443600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240725/1666666 [1:59:21<35:41, 198.94it/s]

finished frames 7444200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240830/1666666 [1:59:21<36:34, 194.08it/s]

finished frames 7444800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1240935/1666666 [1:59:22<34:33, 205.28it/s]

finished frames 7445400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1241040/1666666 [1:59:22<35:04, 202.26it/s]

finished frames 7446000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1241124/1666666 [1:59:23<34:26, 205.88it/s]

finished frames 7446600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1241230/1666666 [1:59:23<34:00, 208.46it/s]

finished frames 7447200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1241336/1666666 [1:59:24<34:06, 207.84it/s]

finished frames 7447800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1241441/1666666 [1:59:24<34:12, 207.19it/s]

finished frames 7448400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1241525/1666666 [1:59:25<34:21, 206.24it/s]

finished frames 7449000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 74%|███████▍  | 1241630/1666666 [1:59:25<34:10, 207.24it/s]

finished frames 7449600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1241735/1666666 [1:59:26<34:08, 207.48it/s]

finished frames 7450200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1241840/1666666 [1:59:26<34:08, 207.40it/s]

finished frames 7450800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1241924/1666666 [1:59:27<34:15, 206.63it/s]

finished frames 7451400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242029/1666666 [1:59:27<34:52, 202.97it/s]

finished frames 7452000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242134/1666666 [1:59:28<34:13, 206.76it/s]

finished frames 7452600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242239/1666666 [1:59:28<34:06, 207.35it/s]

finished frames 7453200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242323/1666666 [1:59:28<34:10, 206.98it/s]

finished frames 7453800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242428/1666666 [1:59:29<34:03, 207.65it/s]

finished frames 7454400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242533/1666666 [1:59:29<34:00, 207.82it/s]

finished frames 7455000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242638/1666666 [1:59:30<33:59, 207.86it/s]

finished frames 7455600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242722/1666666 [1:59:30<34:10, 206.78it/s]

finished frames 7456200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242827/1666666 [1:59:31<34:05, 207.21it/s]

finished frames 7456800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1242932/1666666 [1:59:31<34:07, 206.94it/s]

finished frames 7457400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243035/1666666 [1:59:32<37:55, 186.20it/s]

finished frames 7458000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243119/1666666 [1:59:32<37:54, 186.24it/s]

finished frames 7458600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243225/1666666 [1:59:33<34:33, 204.17it/s]

finished frames 7459200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243330/1666666 [1:59:33<33:59, 207.56it/s]

finished frames 7459800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243436/1666666 [1:59:34<33:47, 208.76it/s]

finished frames 7460400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243541/1666666 [1:59:34<33:44, 209.01it/s]

finished frames 7461000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243625/1666666 [1:59:35<33:51, 208.25it/s]

finished frames 7461600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243731/1666666 [1:59:35<33:46, 208.68it/s]

finished frames 7462200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243837/1666666 [1:59:36<33:45, 208.78it/s]

finished frames 7462800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1243942/1666666 [1:59:36<33:51, 208.12it/s]

finished frames 7463400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244026/1666666 [1:59:37<34:34, 203.78it/s]

finished frames 7464000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244131/1666666 [1:59:37<34:05, 206.55it/s]

finished frames 7464600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244236/1666666 [1:59:38<33:58, 207.19it/s]

finished frames 7465200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244341/1666666 [1:59:38<33:58, 207.16it/s]

finished frames 7465800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244425/1666666 [1:59:39<33:59, 206.98it/s]

finished frames 7466400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244530/1666666 [1:59:39<33:56, 207.30it/s]

finished frames 7467000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244635/1666666 [1:59:40<33:50, 207.82it/s]

finished frames 7467600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244740/1666666 [1:59:40<33:52, 207.54it/s]

finished frames 7468200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244825/1666666 [1:59:41<33:48, 208.00it/s]

finished frames 7468800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1244931/1666666 [1:59:41<33:39, 208.88it/s]

finished frames 7469400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245040/1666666 [1:59:42<34:23, 204.35it/s]

finished frames 7470000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245125/1666666 [1:59:42<33:55, 207.05it/s]

finished frames 7470600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245232/1666666 [1:59:43<33:34, 209.18it/s]

finished frames 7471200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245338/1666666 [1:59:43<33:33, 209.24it/s]

finished frames 7471800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245423/1666666 [1:59:44<33:35, 208.98it/s]

finished frames 7472400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245532/1666666 [1:59:44<33:18, 210.72it/s]

finished frames 7473000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245641/1666666 [1:59:45<33:23, 210.15it/s]

finished frames 7473600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245727/1666666 [1:59:45<33:31, 209.25it/s]

finished frames 7474200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245833/1666666 [1:59:45<33:29, 209.38it/s]

finished frames 7474800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1245940/1666666 [1:59:46<33:30, 209.31it/s]

finished frames 7475400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246024/1666666 [1:59:46<34:17, 204.47it/s]

finished frames 7476000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246130/1666666 [1:59:47<34:01, 205.98it/s]

finished frames 7476600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246238/1666666 [1:59:47<33:21, 210.03it/s]

finished frames 7477200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246326/1666666 [1:59:48<33:19, 210.23it/s]

finished frames 7477800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246436/1666666 [1:59:48<33:13, 210.81it/s]

finished frames 7478400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246524/1666666 [1:59:49<33:13, 210.78it/s]

finished frames 7479000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246634/1666666 [1:59:49<33:10, 210.99it/s]

finished frames 7479600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246722/1666666 [1:59:50<33:17, 210.21it/s]

finished frames 7480200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246832/1666666 [1:59:50<33:03, 211.67it/s]

finished frames 7480800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1246942/1666666 [1:59:51<33:00, 211.97it/s]

finished frames 7481400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247029/1666666 [1:59:51<34:38, 201.92it/s]

finished frames 7482000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247139/1666666 [1:59:52<33:15, 210.27it/s]

finished frames 7482600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247227/1666666 [1:59:52<33:17, 209.94it/s]

finished frames 7483200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247337/1666666 [1:59:53<32:54, 212.38it/s]

finished frames 7483800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247425/1666666 [1:59:53<32:54, 212.30it/s]

finished frames 7484400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247536/1666666 [1:59:54<32:26, 215.33it/s]

finished frames 7485000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247626/1666666 [1:59:54<32:59, 211.72it/s]

finished frames 7485600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247734/1666666 [1:59:55<34:23, 203.01it/s]

finished frames 7486200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247819/1666666 [1:59:55<36:30, 191.19it/s]

finished frames 7486800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1247928/1666666 [1:59:55<33:02, 211.25it/s]

finished frames 7487400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248038/1666666 [1:59:56<32:47, 212.81it/s]

finished frames 7488000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248129/1666666 [1:59:56<31:55, 218.52it/s]

finished frames 7488600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248239/1666666 [1:59:57<32:00, 217.91it/s]

finished frames 7489200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248329/1666666 [1:59:57<31:44, 219.71it/s]

finished frames 7489800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248440/1666666 [1:59:58<32:05, 217.23it/s]

finished frames 7490400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248528/1666666 [1:59:58<32:28, 214.65it/s]

finished frames 7491000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248638/1666666 [1:59:59<32:48, 212.36it/s]

finished frames 7491600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248728/1666666 [1:59:59<31:56, 218.08it/s]

finished frames 7492200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248841/1666666 [2:00:00<31:54, 218.20it/s]

finished frames 7492800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1248929/1666666 [2:00:00<32:07, 216.76it/s]

finished frames 7493400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249039/1666666 [2:00:01<32:58, 211.09it/s]

finished frames 7494000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249127/1666666 [2:00:01<32:51, 211.75it/s]

finished frames 7494600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249237/1666666 [2:00:02<32:57, 211.10it/s]

finished frames 7495200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249325/1666666 [2:00:02<32:53, 211.43it/s]

finished frames 7495800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249433/1666666 [2:00:02<33:05, 210.16it/s]

finished frames 7496400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249521/1666666 [2:00:03<32:38, 212.97it/s]

finished frames 7497000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249631/1666666 [2:00:03<32:50, 211.66it/s]

finished frames 7497600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249741/1666666 [2:00:04<32:53, 211.29it/s]

finished frames 7498200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249829/1666666 [2:00:04<32:53, 211.22it/s]

finished frames 7498800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▍  | 1249939/1666666 [2:00:05<32:55, 210.93it/s]

finished frames 7499400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250026/1666666 [2:00:05<33:51, 205.10it/s]

finished frames 7500000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250133/1666666 [2:00:06<34:28, 201.39it/s]

finished frames 7500600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250242/1666666 [2:00:06<32:54, 210.89it/s]

finished frames 7501200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250330/1666666 [2:00:07<32:53, 210.98it/s]

finished frames 7501800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250440/1666666 [2:00:07<32:47, 211.52it/s]

finished frames 7502400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250528/1666666 [2:00:08<32:48, 211.35it/s]

finished frames 7503000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250636/1666666 [2:00:08<33:17, 208.24it/s]

finished frames 7503600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250741/1666666 [2:00:09<33:27, 207.14it/s]

finished frames 7504200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250825/1666666 [2:00:09<33:37, 206.11it/s]

finished frames 7504800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1250930/1666666 [2:00:10<33:30, 206.78it/s]

finished frames 7505400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251035/1666666 [2:00:10<34:06, 203.06it/s]

finished frames 7506000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251140/1666666 [2:00:11<33:34, 206.25it/s]

finished frames 7506600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251224/1666666 [2:00:11<33:36, 206.01it/s]

finished frames 7507200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251329/1666666 [2:00:12<33:36, 205.99it/s]

finished frames 7507800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251434/1666666 [2:00:12<33:26, 206.93it/s]

finished frames 7508400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251539/1666666 [2:00:13<33:18, 207.76it/s]

finished frames 7509000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251623/1666666 [2:00:13<33:22, 207.29it/s]

finished frames 7509600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251728/1666666 [2:00:14<33:22, 207.25it/s]

finished frames 7510200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251833/1666666 [2:00:14<33:17, 207.63it/s]

finished frames 7510800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1251938/1666666 [2:00:15<33:17, 207.59it/s]

finished frames 7511400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252022/1666666 [2:00:15<34:08, 202.41it/s]

finished frames 7512000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252127/1666666 [2:00:15<33:26, 206.63it/s]

finished frames 7512600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252232/1666666 [2:00:16<33:14, 207.74it/s]

finished frames 7513200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252337/1666666 [2:00:16<33:16, 207.52it/s]

finished frames 7513800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252421/1666666 [2:00:17<39:14, 175.97it/s]

finished frames 7514400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252526/1666666 [2:00:17<34:12, 201.79it/s]

finished frames 7515000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252631/1666666 [2:00:18<33:30, 205.97it/s]

finished frames 7515600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252736/1666666 [2:00:18<33:22, 206.73it/s]

finished frames 7516200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252841/1666666 [2:00:19<33:10, 207.85it/s]

finished frames 7516800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1252925/1666666 [2:00:19<33:14, 207.42it/s]

finished frames 7517400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253030/1666666 [2:00:20<34:07, 202.05it/s]

finished frames 7518000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253135/1666666 [2:00:20<33:25, 206.19it/s]

finished frames 7518600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253240/1666666 [2:00:21<33:21, 206.60it/s]

finished frames 7519200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253324/1666666 [2:00:21<33:20, 206.59it/s]

finished frames 7519800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253429/1666666 [2:00:22<33:24, 206.19it/s]

finished frames 7520400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253534/1666666 [2:00:22<33:26, 205.85it/s]

finished frames 7521000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253639/1666666 [2:00:23<33:21, 206.35it/s]

finished frames 7521600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253723/1666666 [2:00:23<33:23, 206.06it/s]

finished frames 7522200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253828/1666666 [2:00:24<33:12, 207.17it/s]

finished frames 7522800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1253933/1666666 [2:00:24<33:19, 206.41it/s]

finished frames 7523400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254038/1666666 [2:00:25<34:01, 202.09it/s]

finished frames 7524000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254122/1666666 [2:00:25<33:29, 205.31it/s]

finished frames 7524600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254227/1666666 [2:00:26<33:25, 205.69it/s]

finished frames 7525200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254332/1666666 [2:00:26<33:15, 206.62it/s]

finished frames 7525800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254437/1666666 [2:00:27<33:28, 205.28it/s]

finished frames 7526400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254542/1666666 [2:00:27<33:23, 205.71it/s]

finished frames 7527000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254626/1666666 [2:00:28<33:26, 205.30it/s]

finished frames 7527600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254731/1666666 [2:00:28<36:15, 189.32it/s]

finished frames 7528200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254836/1666666 [2:00:29<33:48, 203.00it/s]

finished frames 7528800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1254941/1666666 [2:00:29<34:05, 201.26it/s]

finished frames 7529400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255025/1666666 [2:00:30<34:19, 199.84it/s]

finished frames 7530000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255130/1666666 [2:00:30<33:33, 204.35it/s]

finished frames 7530600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255235/1666666 [2:00:31<33:13, 206.34it/s]

finished frames 7531200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255340/1666666 [2:00:31<33:11, 206.51it/s]

finished frames 7531800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255424/1666666 [2:00:32<33:17, 205.83it/s]

finished frames 7532400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255529/1666666 [2:00:32<33:17, 205.86it/s]

finished frames 7533000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255634/1666666 [2:00:33<33:00, 207.49it/s]

finished frames 7533600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255741/1666666 [2:00:33<32:32, 210.50it/s]

finished frames 7534200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255829/1666666 [2:00:34<32:10, 212.76it/s]

finished frames 7534800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1255939/1666666 [2:00:34<32:03, 213.55it/s]

finished frames 7535400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256027/1666666 [2:00:34<32:59, 207.42it/s]

finished frames 7536000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256136/1666666 [2:00:35<32:22, 211.30it/s]

finished frames 7536600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256224/1666666 [2:00:35<32:22, 211.25it/s]

finished frames 7537200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256334/1666666 [2:00:36<32:14, 212.13it/s]

finished frames 7537800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256444/1666666 [2:00:36<32:05, 213.09it/s]

finished frames 7538400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256532/1666666 [2:00:37<32:13, 212.13it/s]

finished frames 7539000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256642/1666666 [2:00:37<32:04, 213.11it/s]

finished frames 7539600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256730/1666666 [2:00:38<32:18, 211.48it/s]

finished frames 7540200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256837/1666666 [2:00:38<32:51, 207.83it/s]

finished frames 7540800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1256942/1666666 [2:00:39<33:02, 206.71it/s]

finished frames 7541400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257026/1666666 [2:00:39<33:42, 202.54it/s]

finished frames 7542000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257131/1666666 [2:00:40<34:08, 199.88it/s]

finished frames 7542600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257238/1666666 [2:00:40<34:20, 198.72it/s]

finished frames 7543200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257325/1666666 [2:00:41<32:43, 208.49it/s]

finished frames 7543800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257435/1666666 [2:00:41<32:11, 211.86it/s]

finished frames 7544400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257523/1666666 [2:00:42<32:17, 211.21it/s]

finished frames 7545000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257633/1666666 [2:00:42<32:13, 211.54it/s]

finished frames 7545600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257743/1666666 [2:00:43<32:08, 212.01it/s]

finished frames 7546200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257831/1666666 [2:00:43<32:06, 212.18it/s]

finished frames 7546800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1257941/1666666 [2:00:44<32:04, 212.39it/s]

finished frames 7547400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1258029/1666666 [2:00:44<32:48, 207.61it/s]

finished frames 7548000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1258137/1666666 [2:00:45<32:11, 211.54it/s]

finished frames 7548600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 75%|███████▌  | 1258225/1666666 [2:00:45<32:01, 212.61it/s]

finished frames 7549200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1258335/1666666 [2:00:46<31:54, 213.24it/s]

finished frames 7549800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1258423/1666666 [2:00:46<31:52, 213.50it/s]

finished frames 7550400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1258533/1666666 [2:00:46<31:52, 213.43it/s]

finished frames 7551000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1258643/1666666 [2:00:47<31:46, 213.99it/s]

finished frames 7551600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1258731/1666666 [2:00:47<31:45, 214.04it/s]

finished frames 7552200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1258841/1666666 [2:00:48<31:45, 214.03it/s]

finished frames 7552800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1258929/1666666 [2:00:48<31:41, 214.41it/s]

finished frames 7553400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259038/1666666 [2:00:49<32:46, 207.31it/s]

finished frames 7554000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259123/1666666 [2:00:49<32:48, 207.08it/s]

finished frames 7554600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259229/1666666 [2:00:50<32:35, 208.34it/s]

finished frames 7555200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259336/1666666 [2:00:50<32:27, 209.20it/s]

finished frames 7555800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259420/1666666 [2:00:51<32:34, 208.33it/s]

finished frames 7556400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259525/1666666 [2:00:51<33:13, 204.27it/s]

finished frames 7557000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259630/1666666 [2:00:52<33:00, 205.54it/s]

finished frames 7557600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259735/1666666 [2:00:52<32:45, 207.04it/s]

finished frames 7558200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259840/1666666 [2:00:53<32:33, 208.24it/s]

finished frames 7558800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1259926/1666666 [2:00:53<32:09, 210.80it/s]

finished frames 7559400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260036/1666666 [2:00:54<32:31, 208.32it/s]

finished frames 7560000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260124/1666666 [2:00:54<31:58, 211.94it/s]

finished frames 7560600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260234/1666666 [2:00:55<31:49, 212.90it/s]

finished frames 7561200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260344/1666666 [2:00:55<31:49, 212.77it/s]

finished frames 7561800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260432/1666666 [2:00:56<31:50, 212.63it/s]

finished frames 7562400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260542/1666666 [2:00:56<31:42, 213.42it/s]

finished frames 7563000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260630/1666666 [2:00:56<31:46, 213.01it/s]

finished frames 7563600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260740/1666666 [2:00:57<31:39, 213.72it/s]

finished frames 7564200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260828/1666666 [2:00:57<31:46, 212.88it/s]

finished frames 7564800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1260938/1666666 [2:00:58<31:37, 213.86it/s]

finished frames 7565400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261026/1666666 [2:00:58<32:20, 209.05it/s]

finished frames 7566000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261135/1666666 [2:00:59<31:52, 212.05it/s]

finished frames 7566600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261223/1666666 [2:00:59<31:41, 213.26it/s]

finished frames 7567200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261333/1666666 [2:01:00<31:26, 214.84it/s]

finished frames 7567800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261443/1666666 [2:01:00<31:30, 214.36it/s]

finished frames 7568400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261531/1666666 [2:01:01<31:32, 214.08it/s]

finished frames 7569000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261641/1666666 [2:01:01<31:31, 214.10it/s]

finished frames 7569600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261729/1666666 [2:01:02<31:35, 213.64it/s]

finished frames 7570200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261817/1666666 [2:01:02<34:15, 196.96it/s]

finished frames 7570800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1261926/1666666 [2:01:03<32:04, 210.30it/s]

finished frames 7571400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262036/1666666 [2:01:03<32:23, 208.16it/s]

finished frames 7572000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262124/1666666 [2:01:03<31:47, 212.03it/s]

finished frames 7572600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262234/1666666 [2:01:04<31:37, 213.19it/s]

finished frames 7573200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262344/1666666 [2:01:05<31:28, 214.13it/s]

finished frames 7573800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262432/1666666 [2:01:05<31:30, 213.87it/s]

finished frames 7574400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262542/1666666 [2:01:05<31:28, 213.97it/s]

finished frames 7575000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262630/1666666 [2:01:06<31:35, 213.13it/s]

finished frames 7575600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262740/1666666 [2:01:06<31:30, 213.66it/s]

finished frames 7576200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262828/1666666 [2:01:07<31:32, 213.35it/s]

finished frames 7576800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1262938/1666666 [2:01:07<31:28, 213.78it/s]

finished frames 7577400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263026/1666666 [2:01:08<32:20, 208.01it/s]

finished frames 7578000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263135/1666666 [2:01:08<31:51, 211.11it/s]

finished frames 7578600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263223/1666666 [2:01:09<31:46, 211.62it/s]

finished frames 7579200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263333/1666666 [2:01:09<31:20, 214.43it/s]

finished frames 7579800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263443/1666666 [2:01:10<31:16, 214.87it/s]

finished frames 7580400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263531/1666666 [2:01:10<31:31, 213.17it/s]

finished frames 7581000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263641/1666666 [2:01:11<31:28, 213.43it/s]

finished frames 7581600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263729/1666666 [2:01:11<31:40, 212.00it/s]

finished frames 7582200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263839/1666666 [2:01:12<31:51, 210.75it/s]

finished frames 7582800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1263927/1666666 [2:01:12<31:54, 210.33it/s]

finished frames 7583400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264035/1666666 [2:01:13<32:42, 205.12it/s]

finished frames 7584000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264142/1666666 [2:01:13<32:06, 208.97it/s]

finished frames 7584600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264227/1666666 [2:01:13<34:01, 197.16it/s]

finished frames 7585200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264334/1666666 [2:01:14<35:12, 190.50it/s]

finished frames 7585800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264442/1666666 [2:01:15<32:26, 206.61it/s]

finished frames 7586400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264527/1666666 [2:01:15<32:08, 208.56it/s]

finished frames 7587000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264636/1666666 [2:01:15<31:57, 209.67it/s]

finished frames 7587600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264722/1666666 [2:01:16<32:00, 209.29it/s]

finished frames 7588200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264830/1666666 [2:01:16<31:49, 210.48it/s]

finished frames 7588800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1264936/1666666 [2:01:17<32:14, 207.69it/s]

finished frames 7589400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265022/1666666 [2:01:17<32:49, 203.94it/s]

finished frames 7590000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265128/1666666 [2:01:18<32:10, 208.02it/s]

finished frames 7590600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265235/1666666 [2:01:18<31:53, 209.74it/s]

finished frames 7591200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265341/1666666 [2:01:19<31:55, 209.52it/s]

finished frames 7591800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265427/1666666 [2:01:19<31:56, 209.41it/s]

finished frames 7592400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265535/1666666 [2:01:20<31:53, 209.59it/s]

finished frames 7593000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265642/1666666 [2:01:20<31:50, 209.85it/s]

finished frames 7593600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265728/1666666 [2:01:21<31:53, 209.55it/s]

finished frames 7594200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265836/1666666 [2:01:21<31:47, 210.18it/s]

finished frames 7594800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1265924/1666666 [2:01:22<31:43, 210.54it/s]

finished frames 7595400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266034/1666666 [2:01:22<32:10, 207.51it/s]

finished frames 7596000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266139/1666666 [2:01:23<31:58, 208.73it/s]

finished frames 7596600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266225/1666666 [2:01:23<31:55, 209.06it/s]

finished frames 7597200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266334/1666666 [2:01:24<31:47, 209.87it/s]

finished frames 7597800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266422/1666666 [2:01:24<31:46, 209.89it/s]

finished frames 7598400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266532/1666666 [2:01:25<31:31, 211.56it/s]

finished frames 7599000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266641/1666666 [2:01:25<32:23, 205.78it/s]

finished frames 7599600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266727/1666666 [2:01:26<33:03, 201.63it/s]

finished frames 7600200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266833/1666666 [2:01:26<32:06, 207.50it/s]

finished frames 7600800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1266941/1666666 [2:01:27<31:40, 210.33it/s]

finished frames 7601400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267026/1666666 [2:01:27<32:34, 204.42it/s]

finished frames 7602000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267133/1666666 [2:01:27<31:52, 208.93it/s]

finished frames 7602600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267241/1666666 [2:01:28<31:43, 209.87it/s]

finished frames 7603200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267325/1666666 [2:01:28<32:10, 206.91it/s]

finished frames 7603800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267430/1666666 [2:01:29<32:13, 206.49it/s]

finished frames 7604400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267535/1666666 [2:01:29<32:10, 206.72it/s]

finished frames 7605000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267640/1666666 [2:01:30<32:12, 206.44it/s]

finished frames 7605600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267724/1666666 [2:01:30<32:12, 206.47it/s]

finished frames 7606200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267829/1666666 [2:01:31<32:12, 206.34it/s]

finished frames 7606800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1267934/1666666 [2:01:31<32:04, 207.17it/s]

finished frames 7607400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268039/1666666 [2:01:32<32:37, 203.68it/s]

finished frames 7608000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268123/1666666 [2:01:32<32:17, 205.67it/s]

finished frames 7608600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268228/1666666 [2:01:33<32:11, 206.29it/s]

finished frames 7609200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268333/1666666 [2:01:33<32:05, 206.86it/s]

finished frames 7609800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268438/1666666 [2:01:34<32:09, 206.44it/s]

finished frames 7610400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268522/1666666 [2:01:34<32:14, 205.77it/s]

finished frames 7611000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268627/1666666 [2:01:35<32:04, 206.81it/s]

finished frames 7611600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268732/1666666 [2:01:35<32:12, 205.96it/s]

finished frames 7612200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268837/1666666 [2:01:36<32:10, 206.05it/s]

finished frames 7612800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1268942/1666666 [2:01:36<33:04, 200.40it/s]

finished frames 7613400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269024/1666666 [2:01:37<37:22, 177.33it/s]

finished frames 7614000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269129/1666666 [2:01:37<32:54, 201.29it/s]

finished frames 7614600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269234/1666666 [2:01:38<32:09, 205.94it/s]

finished frames 7615200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269339/1666666 [2:01:38<32:02, 206.62it/s]

finished frames 7615800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269423/1666666 [2:01:39<32:06, 206.16it/s]

finished frames 7616400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269528/1666666 [2:01:39<31:52, 207.61it/s]

finished frames 7617000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269633/1666666 [2:01:40<32:01, 206.60it/s]

finished frames 7617600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269738/1666666 [2:01:40<32:02, 206.50it/s]

finished frames 7618200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269822/1666666 [2:01:41<32:04, 206.18it/s]

finished frames 7618800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1269927/1666666 [2:01:41<31:55, 207.13it/s]

finished frames 7619400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1270032/1666666 [2:01:42<32:37, 202.67it/s]

finished frames 7620000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1270137/1666666 [2:01:42<32:03, 206.18it/s]

finished frames 7620600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1270242/1666666 [2:01:43<31:55, 206.95it/s]

finished frames 7621200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1270326/1666666 [2:01:43<31:59, 206.45it/s]

finished frames 7621800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1270431/1666666 [2:01:44<32:07, 205.58it/s]

finished frames 7622400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1270536/1666666 [2:01:44<32:06, 205.66it/s]

finished frames 7623000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1270641/1666666 [2:01:45<31:59, 206.33it/s]

finished frames 7623600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1270725/1666666 [2:01:45<31:52, 207.07it/s]

finished frames 7624200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▌  | 1270831/1666666 [2:01:46<32:02, 205.88it/s]

finished frames 7624800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1270937/1666666 [2:01:46<31:32, 209.11it/s]

finished frames 7625400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271024/1666666 [2:01:46<31:56, 206.46it/s]

finished frames 7626000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271133/1666666 [2:01:47<31:08, 211.74it/s]

finished frames 7626600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271241/1666666 [2:01:48<32:13, 204.53it/s]

finished frames 7627200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271327/1666666 [2:01:48<31:24, 209.73it/s]

finished frames 7627800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271437/1666666 [2:01:48<31:00, 212.39it/s]

finished frames 7628400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271525/1666666 [2:01:49<30:54, 213.12it/s]

finished frames 7629000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271635/1666666 [2:01:49<31:00, 212.38it/s]

finished frames 7629600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271723/1666666 [2:01:50<30:56, 212.77it/s]

finished frames 7630200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271833/1666666 [2:01:50<30:51, 213.23it/s]

finished frames 7630800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1271943/1666666 [2:01:51<30:46, 213.72it/s]

finished frames 7631400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272031/1666666 [2:01:51<31:27, 209.05it/s]

finished frames 7632000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272141/1666666 [2:01:52<30:55, 212.60it/s]

finished frames 7632600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272229/1666666 [2:01:52<30:48, 213.34it/s]

finished frames 7633200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272339/1666666 [2:01:53<30:45, 213.62it/s]

finished frames 7633800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272427/1666666 [2:01:53<31:10, 210.78it/s]

finished frames 7634400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272538/1666666 [2:01:54<30:40, 214.09it/s]

finished frames 7635000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272628/1666666 [2:01:54<30:45, 213.51it/s]

finished frames 7635600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272739/1666666 [2:01:55<30:35, 214.60it/s]

finished frames 7636200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272825/1666666 [2:01:55<31:28, 208.50it/s]

finished frames 7636800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1272934/1666666 [2:01:55<30:59, 211.76it/s]

finished frames 7637400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273022/1666666 [2:01:56<31:44, 206.71it/s]

finished frames 7638000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273137/1666666 [2:01:56<29:59, 218.73it/s]

finished frames 7638600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273229/1666666 [2:01:57<29:49, 219.87it/s]

finished frames 7639200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273342/1666666 [2:01:57<29:42, 220.70it/s]

finished frames 7639800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273433/1666666 [2:01:58<30:28, 215.01it/s]

finished frames 7640400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273522/1666666 [2:01:58<30:24, 215.49it/s]

finished frames 7641000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273629/1666666 [2:01:59<33:02, 198.21it/s]

finished frames 7641600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273739/1666666 [2:01:59<30:32, 214.38it/s]

finished frames 7642200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273827/1666666 [2:02:00<30:02, 217.92it/s]

finished frames 7642800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1273937/1666666 [2:02:00<30:01, 218.01it/s]

finished frames 7643400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274025/1666666 [2:02:01<30:51, 212.02it/s]

finished frames 7644000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274135/1666666 [2:02:01<30:49, 212.29it/s]

finished frames 7644600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274223/1666666 [2:02:02<30:42, 212.94it/s]

finished frames 7645200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274332/1666666 [2:02:02<31:21, 208.54it/s]

finished frames 7645800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274442/1666666 [2:02:03<30:36, 213.52it/s]

finished frames 7646400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274531/1666666 [2:02:03<30:28, 214.49it/s]

finished frames 7647000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274643/1666666 [2:02:03<30:04, 217.20it/s]

finished frames 7647600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274731/1666666 [2:02:04<30:34, 213.67it/s]

finished frames 7648200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274841/1666666 [2:02:04<30:23, 214.86it/s]

finished frames 7648800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 76%|███████▋  | 1274929/1666666 [2:02:05<30:39, 212.99it/s]

finished frames 7649400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275039/1666666 [2:02:05<31:21, 208.16it/s]

finished frames 7650000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275127/1666666 [2:02:06<30:51, 211.42it/s]

finished frames 7650600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275237/1666666 [2:02:06<30:44, 212.27it/s]

finished frames 7651200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275325/1666666 [2:02:07<30:44, 212.15it/s]

finished frames 7651800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275435/1666666 [2:02:07<30:35, 213.10it/s]

finished frames 7652400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275523/1666666 [2:02:08<30:28, 213.89it/s]

finished frames 7653000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275633/1666666 [2:02:08<30:29, 213.69it/s]

finished frames 7653600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275743/1666666 [2:02:09<30:25, 214.20it/s]

finished frames 7654200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275831/1666666 [2:02:09<30:20, 214.72it/s]

finished frames 7654800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1275941/1666666 [2:02:10<30:26, 213.92it/s]

finished frames 7655400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276027/1666666 [2:02:10<32:31, 200.22it/s]

finished frames 7656000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276135/1666666 [2:02:11<33:08, 196.43it/s]

finished frames 7656600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276243/1666666 [2:02:11<31:03, 209.46it/s]

finished frames 7657200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276331/1666666 [2:02:11<30:37, 212.41it/s]

finished frames 7657800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276441/1666666 [2:02:12<30:24, 213.88it/s]

finished frames 7658400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276529/1666666 [2:02:12<30:25, 213.73it/s]

finished frames 7659000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276639/1666666 [2:02:13<30:21, 214.17it/s]

finished frames 7659600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276727/1666666 [2:02:13<30:31, 212.86it/s]

finished frames 7660200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276837/1666666 [2:02:14<30:25, 213.53it/s]

finished frames 7660800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1276925/1666666 [2:02:14<30:28, 213.17it/s]

finished frames 7661400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277035/1666666 [2:02:15<30:59, 209.54it/s]

finished frames 7662000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277123/1666666 [2:02:15<30:33, 212.51it/s]

finished frames 7662600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277233/1666666 [2:02:16<30:43, 211.30it/s]

finished frames 7663200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277343/1666666 [2:02:16<30:50, 210.38it/s]

finished frames 7663800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277428/1666666 [2:02:17<31:14, 207.70it/s]

finished frames 7664400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277533/1666666 [2:02:17<31:16, 207.38it/s]

finished frames 7665000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277638/1666666 [2:02:18<31:14, 207.51it/s]

finished frames 7665600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277722/1666666 [2:02:18<31:19, 206.89it/s]

finished frames 7666200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277827/1666666 [2:02:19<31:19, 206.91it/s]

finished frames 7666800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1277932/1666666 [2:02:19<31:11, 207.74it/s]

finished frames 7667400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278037/1666666 [2:02:20<31:59, 202.44it/s]

finished frames 7668000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278142/1666666 [2:02:20<31:26, 205.93it/s]

finished frames 7668600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278226/1666666 [2:02:21<31:19, 206.65it/s]

finished frames 7669200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278331/1666666 [2:02:21<33:59, 190.39it/s]

finished frames 7669800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278437/1666666 [2:02:22<31:22, 206.19it/s]

finished frames 7670400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278524/1666666 [2:02:22<30:48, 209.99it/s]

finished frames 7671000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278634/1666666 [2:02:22<30:32, 211.76it/s]

finished frames 7671600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278722/1666666 [2:02:23<30:51, 209.51it/s]

finished frames 7672200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278832/1666666 [2:02:23<30:26, 212.37it/s]

finished frames 7672800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1278942/1666666 [2:02:24<30:18, 213.18it/s]

finished frames 7673400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279030/1666666 [2:02:24<30:51, 209.42it/s]

finished frames 7674000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279140/1666666 [2:02:25<30:11, 213.96it/s]

finished frames 7674600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279228/1666666 [2:02:25<30:10, 214.03it/s]

finished frames 7675200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279338/1666666 [2:02:26<30:13, 213.53it/s]

finished frames 7675800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279426/1666666 [2:02:26<30:19, 212.86it/s]

finished frames 7676400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279536/1666666 [2:02:27<30:13, 213.44it/s]

finished frames 7677000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279624/1666666 [2:02:27<30:17, 212.91it/s]

finished frames 7677600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279734/1666666 [2:02:28<30:00, 214.95it/s]

finished frames 7678200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279844/1666666 [2:02:28<30:06, 214.17it/s]

finished frames 7678800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1279932/1666666 [2:02:29<30:13, 213.23it/s]

finished frames 7679400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280020/1666666 [2:02:29<31:14, 206.27it/s]

finished frames 7680000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280130/1666666 [2:02:30<30:22, 212.08it/s]

finished frames 7680600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280240/1666666 [2:02:30<30:40, 209.98it/s]

finished frames 7681200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280324/1666666 [2:02:30<31:01, 207.60it/s]

finished frames 7681800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280429/1666666 [2:02:31<31:11, 206.41it/s]

finished frames 7682400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280534/1666666 [2:02:31<31:13, 206.13it/s]

finished frames 7683000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280640/1666666 [2:02:32<30:57, 207.82it/s]

finished frames 7683600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280727/1666666 [2:02:32<32:17, 199.22it/s]

finished frames 7684200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280837/1666666 [2:02:33<30:23, 211.60it/s]

finished frames 7684800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1280925/1666666 [2:02:33<30:20, 211.84it/s]

finished frames 7685400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281035/1666666 [2:02:34<30:50, 208.41it/s]

finished frames 7686000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281123/1666666 [2:02:34<30:13, 212.60it/s]

finished frames 7686600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281233/1666666 [2:02:35<29:52, 215.07it/s]

finished frames 7687200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281343/1666666 [2:02:35<29:55, 214.61it/s]

finished frames 7687800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281431/1666666 [2:02:36<29:55, 214.57it/s]

finished frames 7688400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281541/1666666 [2:02:36<29:55, 214.48it/s]

finished frames 7689000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281629/1666666 [2:02:37<29:53, 214.71it/s]

finished frames 7689600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281739/1666666 [2:02:37<29:52, 214.80it/s]

finished frames 7690200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281827/1666666 [2:02:38<29:54, 214.49it/s]

finished frames 7690800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1281937/1666666 [2:02:38<29:50, 214.83it/s]

finished frames 7691400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282025/1666666 [2:02:39<30:44, 208.55it/s]

finished frames 7692000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282135/1666666 [2:02:39<30:08, 212.67it/s]

finished frames 7692600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282223/1666666 [2:02:39<30:05, 212.99it/s]

finished frames 7693200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282333/1666666 [2:02:40<29:50, 214.64it/s]

finished frames 7693800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282443/1666666 [2:02:40<30:05, 212.85it/s]

finished frames 7694400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282529/1666666 [2:02:41<30:45, 208.13it/s]

finished frames 7695000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282634/1666666 [2:02:41<30:49, 207.68it/s]

finished frames 7695600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282739/1666666 [2:02:42<30:54, 207.02it/s]

finished frames 7696200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282823/1666666 [2:02:42<31:00, 206.31it/s]

finished frames 7696800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1282928/1666666 [2:02:43<30:58, 206.51it/s]

finished frames 7697400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283034/1666666 [2:02:43<31:10, 205.08it/s]

finished frames 7698000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283142/1666666 [2:02:44<31:34, 202.42it/s]

finished frames 7698600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283229/1666666 [2:02:44<32:19, 197.73it/s]

finished frames 7699200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283339/1666666 [2:02:45<30:14, 211.31it/s]

finished frames 7699800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283427/1666666 [2:02:45<30:01, 212.70it/s]

finished frames 7700400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283537/1666666 [2:02:46<29:57, 213.09it/s]

finished frames 7701000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283625/1666666 [2:02:46<29:59, 212.89it/s]

finished frames 7701600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283735/1666666 [2:02:47<30:01, 212.56it/s]

finished frames 7702200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283823/1666666 [2:02:47<29:59, 212.73it/s]

finished frames 7702800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1283933/1666666 [2:02:48<29:41, 214.82it/s]

finished frames 7703400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284021/1666666 [2:02:48<30:49, 206.89it/s]

finished frames 7704000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284128/1666666 [2:02:49<30:27, 209.30it/s]

finished frames 7704600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284233/1666666 [2:02:49<30:41, 207.68it/s]

finished frames 7705200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284338/1666666 [2:02:50<30:45, 207.12it/s]

finished frames 7705800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284422/1666666 [2:02:50<30:48, 206.75it/s]

finished frames 7706400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284527/1666666 [2:02:50<30:38, 207.80it/s]

finished frames 7707000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284632/1666666 [2:02:51<30:43, 207.20it/s]

finished frames 7707600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284737/1666666 [2:02:52<30:43, 207.17it/s]

finished frames 7708200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284842/1666666 [2:02:52<30:43, 207.14it/s]

finished frames 7708800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1284926/1666666 [2:02:52<30:44, 206.93it/s]

finished frames 7709400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285031/1666666 [2:02:53<32:27, 195.98it/s]

finished frames 7710000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285136/1666666 [2:02:53<31:09, 204.05it/s]

finished frames 7710600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285241/1666666 [2:02:54<30:46, 206.60it/s]

finished frames 7711200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285325/1666666 [2:02:54<30:39, 207.36it/s]

finished frames 7711800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285430/1666666 [2:02:55<33:24, 190.20it/s]

finished frames 7712400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285535/1666666 [2:02:55<30:57, 205.18it/s]

finished frames 7713000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285640/1666666 [2:02:56<31:09, 203.76it/s]

finished frames 7713600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285726/1666666 [2:02:56<30:34, 207.64it/s]

finished frames 7714200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285832/1666666 [2:02:57<30:23, 208.82it/s]

finished frames 7714800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1285939/1666666 [2:02:57<30:17, 209.42it/s]

finished frames 7715400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286023/1666666 [2:02:58<31:06, 203.98it/s]

finished frames 7716000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286129/1666666 [2:02:58<30:25, 208.44it/s]

finished frames 7716600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286236/1666666 [2:02:59<30:18, 209.25it/s]

finished frames 7717200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286343/1666666 [2:02:59<30:20, 208.90it/s]

finished frames 7717800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286428/1666666 [2:03:00<30:18, 209.04it/s]

finished frames 7718400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286534/1666666 [2:03:00<30:15, 209.43it/s]

finished frames 7719000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286641/1666666 [2:03:01<30:08, 210.17it/s]

finished frames 7719600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286727/1666666 [2:03:01<30:10, 209.90it/s]

finished frames 7720200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286834/1666666 [2:03:02<30:09, 209.90it/s]

finished frames 7720800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1286939/1666666 [2:03:02<30:17, 208.95it/s]

finished frames 7721400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287023/1666666 [2:03:03<31:06, 203.43it/s]

finished frames 7722000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287129/1666666 [2:03:03<30:28, 207.62it/s]

finished frames 7722600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287237/1666666 [2:03:04<30:15, 209.03it/s]

finished frames 7723200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287343/1666666 [2:03:04<30:15, 208.89it/s]

finished frames 7723800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287428/1666666 [2:03:05<30:18, 208.58it/s]

finished frames 7724400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287535/1666666 [2:03:05<29:57, 210.96it/s]

finished frames 7725000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287623/1666666 [2:03:05<29:51, 211.54it/s]

finished frames 7725600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287733/1666666 [2:03:06<29:45, 212.20it/s]

finished frames 7726200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287843/1666666 [2:03:07<30:37, 206.13it/s]

finished frames 7726800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1287929/1666666 [2:03:07<31:16, 201.82it/s]

finished frames 7727400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288037/1666666 [2:03:07<30:38, 205.92it/s]

finished frames 7728000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288124/1666666 [2:03:08<29:53, 211.09it/s]

finished frames 7728600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288234/1666666 [2:03:08<29:29, 213.84it/s]

finished frames 7729200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288344/1666666 [2:03:09<29:33, 213.26it/s]

finished frames 7729800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288432/1666666 [2:03:09<29:36, 212.90it/s]

finished frames 7730400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288542/1666666 [2:03:10<29:32, 213.37it/s]

finished frames 7731000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288630/1666666 [2:03:10<29:33, 213.13it/s]

finished frames 7731600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288740/1666666 [2:03:11<29:30, 213.42it/s]

finished frames 7732200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288828/1666666 [2:03:11<29:32, 213.14it/s]

finished frames 7732800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1288938/1666666 [2:03:12<29:34, 212.92it/s]

finished frames 7733400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289026/1666666 [2:03:12<30:20, 207.48it/s]

finished frames 7734000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289136/1666666 [2:03:13<29:38, 212.30it/s]

finished frames 7734600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289224/1666666 [2:03:13<29:36, 212.46it/s]

finished frames 7735200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289334/1666666 [2:03:14<29:30, 213.15it/s]

finished frames 7735800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289444/1666666 [2:03:14<29:22, 214.03it/s]

finished frames 7736400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289532/1666666 [2:03:15<29:23, 213.82it/s]

finished frames 7737000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289642/1666666 [2:03:15<29:20, 214.18it/s]

finished frames 7737600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289730/1666666 [2:03:15<29:29, 213.02it/s]

finished frames 7738200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289840/1666666 [2:03:16<29:25, 213.42it/s]

finished frames 7738800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1289928/1666666 [2:03:16<29:30, 212.73it/s]

finished frames 7739400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290038/1666666 [2:03:17<29:56, 209.60it/s]

finished frames 7740000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290126/1666666 [2:03:17<29:26, 213.15it/s]

finished frames 7740600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290235/1666666 [2:03:18<30:02, 208.89it/s]

finished frames 7741200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290323/1666666 [2:03:18<30:49, 203.45it/s]

finished frames 7741800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290433/1666666 [2:03:19<29:41, 211.25it/s]

finished frames 7742400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290543/1666666 [2:03:19<29:27, 212.79it/s]

finished frames 7743000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290631/1666666 [2:03:20<29:24, 213.08it/s]

finished frames 7743600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290741/1666666 [2:03:20<29:17, 213.89it/s]

finished frames 7744200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290829/1666666 [2:03:21<29:23, 213.15it/s]

finished frames 7744800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1290939/1666666 [2:03:21<29:21, 213.26it/s]

finished frames 7745400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1291027/1666666 [2:03:22<30:07, 207.81it/s]

finished frames 7746000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1291136/1666666 [2:03:22<29:38, 211.11it/s]

finished frames 7746600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1291224/1666666 [2:03:23<29:30, 212.09it/s]

finished frames 7747200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1291334/1666666 [2:03:23<29:18, 213.40it/s]

finished frames 7747800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1291422/1666666 [2:03:23<29:26, 212.42it/s]

finished frames 7748400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1291532/1666666 [2:03:24<29:32, 211.64it/s]

finished frames 7749000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 77%|███████▋  | 1291642/1666666 [2:03:25<29:24, 212.51it/s]

finished frames 7749600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1291730/1666666 [2:03:25<29:25, 212.34it/s]

finished frames 7750200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1291841/1666666 [2:03:25<29:09, 214.26it/s]

finished frames 7750800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1291929/1666666 [2:03:26<29:14, 213.56it/s]

finished frames 7751400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292039/1666666 [2:03:26<29:55, 208.63it/s]

finished frames 7752000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292127/1666666 [2:03:27<29:26, 211.97it/s]

finished frames 7752600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292237/1666666 [2:03:27<29:18, 212.96it/s]

finished frames 7753200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292325/1666666 [2:03:28<29:19, 212.79it/s]

finished frames 7753800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292435/1666666 [2:03:28<29:11, 213.60it/s]

finished frames 7754400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292523/1666666 [2:03:29<29:13, 213.37it/s]

finished frames 7755000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292631/1666666 [2:03:29<30:09, 206.66it/s]

finished frames 7755600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292738/1666666 [2:03:30<30:23, 205.01it/s]

finished frames 7756200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292822/1666666 [2:03:30<29:57, 207.96it/s]

finished frames 7756800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1292932/1666666 [2:03:31<29:32, 210.81it/s]

finished frames 7757400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293020/1666666 [2:03:31<30:27, 204.45it/s]

finished frames 7758000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293130/1666666 [2:03:32<29:36, 210.28it/s]

finished frames 7758600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293240/1666666 [2:03:32<29:39, 209.81it/s]

finished frames 7759200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293325/1666666 [2:03:33<29:43, 209.33it/s]

finished frames 7759800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293434/1666666 [2:03:33<29:29, 210.94it/s]

finished frames 7760400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293542/1666666 [2:03:34<29:43, 209.16it/s]

finished frames 7761000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293627/1666666 [2:03:34<29:43, 209.11it/s]

finished frames 7761600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293735/1666666 [2:03:34<29:35, 210.05it/s]

finished frames 7762200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293843/1666666 [2:03:35<29:30, 210.54it/s]

finished frames 7762800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1293930/1666666 [2:03:35<29:34, 210.07it/s]

finished frames 7763400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294036/1666666 [2:03:36<30:22, 204.41it/s]

finished frames 7764000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294143/1666666 [2:03:36<29:40, 209.26it/s]

finished frames 7764600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294228/1666666 [2:03:37<29:43, 208.87it/s]

finished frames 7765200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294336/1666666 [2:03:37<29:29, 210.39it/s]

finished frames 7765800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294423/1666666 [2:03:38<29:36, 209.58it/s]

finished frames 7766400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294530/1666666 [2:03:38<29:36, 209.46it/s]

finished frames 7767000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294637/1666666 [2:03:39<29:32, 209.93it/s]

finished frames 7767600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294722/1666666 [2:03:39<29:37, 209.21it/s]

finished frames 7768200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294832/1666666 [2:03:40<29:24, 210.71it/s]

finished frames 7768800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1294940/1666666 [2:03:40<30:47, 201.16it/s]

finished frames 7769400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295027/1666666 [2:03:41<31:57, 193.86it/s]

finished frames 7770000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295134/1666666 [2:03:41<29:51, 207.34it/s]

finished frames 7770600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295240/1666666 [2:03:42<29:32, 209.50it/s]

finished frames 7771200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295325/1666666 [2:03:42<29:35, 209.15it/s]

finished frames 7771800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295434/1666666 [2:03:43<29:24, 210.43it/s]

finished frames 7772400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295522/1666666 [2:03:43<29:27, 209.96it/s]

finished frames 7773000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295630/1666666 [2:03:44<29:31, 209.42it/s]

finished frames 7773600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295735/1666666 [2:03:44<29:36, 208.83it/s]

finished frames 7774200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295842/1666666 [2:03:45<29:27, 209.79it/s]

finished frames 7774800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1295927/1666666 [2:03:45<29:32, 209.19it/s]

finished frames 7775400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296033/1666666 [2:03:46<30:07, 205.05it/s]

finished frames 7776000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296139/1666666 [2:03:46<29:38, 208.34it/s]

finished frames 7776600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296224/1666666 [2:03:46<29:35, 208.66it/s]

finished frames 7777200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296332/1666666 [2:03:47<29:29, 209.34it/s]

finished frames 7777800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296439/1666666 [2:03:47<29:23, 209.93it/s]

finished frames 7778400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296523/1666666 [2:03:48<29:32, 208.83it/s]

finished frames 7779000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296630/1666666 [2:03:48<29:27, 209.40it/s]

finished frames 7779600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296739/1666666 [2:03:49<29:09, 211.47it/s]

finished frames 7780200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296827/1666666 [2:03:49<29:04, 211.97it/s]

finished frames 7780800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1296937/1666666 [2:03:50<28:59, 212.52it/s]

finished frames 7781400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297025/1666666 [2:03:50<29:35, 208.20it/s]

finished frames 7782000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297135/1666666 [2:03:51<28:57, 212.73it/s]

finished frames 7782600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297223/1666666 [2:03:51<28:53, 213.16it/s]

finished frames 7783200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297333/1666666 [2:03:52<28:45, 214.06it/s]

finished frames 7783800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297443/1666666 [2:03:52<28:53, 213.02it/s]

finished frames 7784400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297531/1666666 [2:03:53<28:57, 212.50it/s]

finished frames 7785000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297641/1666666 [2:03:53<29:01, 211.93it/s]

finished frames 7785600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297728/1666666 [2:03:54<29:03, 211.60it/s]

finished frames 7786200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297839/1666666 [2:03:54<28:41, 214.24it/s]

finished frames 7786800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1297929/1666666 [2:03:55<28:13, 217.73it/s]

finished frames 7787400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298038/1666666 [2:03:55<29:42, 206.76it/s]

finished frames 7788000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298126/1666666 [2:03:55<28:44, 213.73it/s]

finished frames 7788600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298238/1666666 [2:03:56<28:04, 218.75it/s]

finished frames 7789200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298329/1666666 [2:03:56<27:53, 220.09it/s]

finished frames 7789800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298444/1666666 [2:03:57<27:54, 219.84it/s]

finished frames 7790400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298532/1666666 [2:03:57<28:06, 218.23it/s]

finished frames 7791000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298623/1666666 [2:03:58<27:54, 219.86it/s]

finished frames 7791600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298734/1666666 [2:03:58<28:06, 218.13it/s]

finished frames 7792200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298844/1666666 [2:03:59<28:33, 214.71it/s]

finished frames 7792800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1298933/1666666 [2:03:59<28:09, 217.62it/s]

finished frames 7793400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299025/1666666 [2:04:00<28:08, 217.77it/s]

finished frames 7794000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299140/1666666 [2:04:00<27:40, 221.32it/s]

finished frames 7794600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299231/1666666 [2:04:01<27:50, 219.92it/s]

finished frames 7795200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299345/1666666 [2:04:01<27:36, 221.80it/s]

finished frames 7795800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299435/1666666 [2:04:01<28:25, 215.30it/s]

finished frames 7796400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299523/1666666 [2:04:02<28:22, 215.61it/s]

finished frames 7797000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299633/1666666 [2:04:02<28:49, 212.27it/s]

finished frames 7797600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299743/1666666 [2:04:03<29:36, 206.51it/s]

finished frames 7798200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299829/1666666 [2:04:03<31:07, 196.46it/s]

finished frames 7798800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1299939/1666666 [2:04:04<29:03, 210.31it/s]

finished frames 7799400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300026/1666666 [2:04:04<29:27, 207.44it/s]

finished frames 7800000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300136/1666666 [2:04:05<28:43, 212.72it/s]

finished frames 7800600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300224/1666666 [2:04:05<28:38, 213.26it/s]

finished frames 7801200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300334/1666666 [2:04:06<28:27, 214.50it/s]

finished frames 7801800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300444/1666666 [2:04:06<28:32, 213.82it/s]

finished frames 7802400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300532/1666666 [2:04:07<28:35, 213.40it/s]

finished frames 7803000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300642/1666666 [2:04:07<28:32, 213.79it/s]

finished frames 7803600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300730/1666666 [2:04:08<28:35, 213.30it/s]

finished frames 7804200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300840/1666666 [2:04:08<28:27, 214.25it/s]

finished frames 7804800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1300928/1666666 [2:04:09<28:25, 214.41it/s]

finished frames 7805400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301038/1666666 [2:04:09<29:00, 210.04it/s]

finished frames 7806000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301126/1666666 [2:04:09<28:46, 211.70it/s]

finished frames 7806600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301236/1666666 [2:04:10<28:43, 212.05it/s]

finished frames 7807200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301324/1666666 [2:04:10<28:37, 212.76it/s]

finished frames 7807800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301434/1666666 [2:04:11<28:24, 214.28it/s]

finished frames 7808400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301544/1666666 [2:04:11<28:25, 214.04it/s]

finished frames 7809000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301632/1666666 [2:04:12<28:29, 213.54it/s]

finished frames 7809600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301742/1666666 [2:04:12<28:30, 213.40it/s]

finished frames 7810200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301830/1666666 [2:04:13<28:31, 213.20it/s]

finished frames 7810800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1301940/1666666 [2:04:13<28:33, 212.83it/s]

finished frames 7811400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302027/1666666 [2:04:14<29:19, 207.22it/s]

finished frames 7812000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302134/1666666 [2:04:14<29:49, 203.75it/s]

finished frames 7812600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302242/1666666 [2:04:15<30:22, 199.93it/s]

finished frames 7813200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302329/1666666 [2:04:15<29:13, 207.80it/s]

finished frames 7813800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302437/1666666 [2:04:16<28:54, 210.02it/s]

finished frames 7814400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302523/1666666 [2:04:16<29:00, 209.21it/s]

finished frames 7815000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302633/1666666 [2:04:17<28:39, 211.69it/s]

finished frames 7815600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302743/1666666 [2:04:17<28:47, 210.64it/s]

finished frames 7816200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302831/1666666 [2:04:18<28:52, 209.95it/s]

finished frames 7816800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1302941/1666666 [2:04:18<28:49, 210.36it/s]

finished frames 7817400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303026/1666666 [2:04:19<29:45, 203.61it/s]

finished frames 7818000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303134/1666666 [2:04:19<28:55, 209.43it/s]

finished frames 7818600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303242/1666666 [2:04:20<28:46, 210.53it/s]

finished frames 7819200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303330/1666666 [2:04:20<28:49, 210.06it/s]

finished frames 7819800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303440/1666666 [2:04:21<28:42, 210.90it/s]

finished frames 7820400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303528/1666666 [2:04:21<28:43, 210.72it/s]

finished frames 7821000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303638/1666666 [2:04:21<28:38, 211.21it/s]

finished frames 7821600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303726/1666666 [2:04:22<28:43, 210.64it/s]

finished frames 7822200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303836/1666666 [2:04:22<28:52, 209.46it/s]

finished frames 7822800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1303923/1666666 [2:04:23<28:55, 209.03it/s]

finished frames 7823400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304029/1666666 [2:04:23<29:29, 204.90it/s]

finished frames 7824000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304136/1666666 [2:04:24<29:00, 208.34it/s]

finished frames 7824600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304243/1666666 [2:04:24<28:52, 209.22it/s]

finished frames 7825200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304329/1666666 [2:04:25<28:51, 209.27it/s]

finished frames 7825800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304435/1666666 [2:04:25<30:33, 197.57it/s]

finished frames 7826400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304540/1666666 [2:04:26<29:30, 204.52it/s]

finished frames 7827000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304625/1666666 [2:04:26<28:59, 208.16it/s]

finished frames 7827600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304734/1666666 [2:04:27<28:42, 210.06it/s]

finished frames 7828200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304842/1666666 [2:04:27<28:38, 210.51it/s]

finished frames 7828800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1304930/1666666 [2:04:28<28:39, 210.32it/s]

finished frames 7829400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305040/1666666 [2:04:28<29:06, 207.11it/s]

finished frames 7830000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305125/1666666 [2:04:29<28:45, 209.49it/s]

finished frames 7830600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305235/1666666 [2:04:29<28:34, 210.82it/s]

finished frames 7831200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305323/1666666 [2:04:30<28:33, 210.85it/s]

finished frames 7831800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305433/1666666 [2:04:30<28:30, 211.13it/s]

finished frames 7832400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305543/1666666 [2:04:31<28:25, 211.75it/s]

finished frames 7833000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305631/1666666 [2:04:31<28:29, 211.20it/s]

finished frames 7833600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305741/1666666 [2:04:32<28:34, 210.54it/s]

finished frames 7834200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305829/1666666 [2:04:32<28:14, 212.98it/s]

finished frames 7834800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1305939/1666666 [2:04:32<28:29, 210.95it/s]

finished frames 7835400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306026/1666666 [2:04:33<29:13, 205.72it/s]

finished frames 7836000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306134/1666666 [2:04:33<28:36, 210.06it/s]

finished frames 7836600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306222/1666666 [2:04:34<28:31, 210.66it/s]

finished frames 7837200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306332/1666666 [2:04:34<28:32, 210.43it/s]

finished frames 7837800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306442/1666666 [2:04:35<28:28, 210.90it/s]

finished frames 7838400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306530/1666666 [2:04:35<28:28, 210.74it/s]

finished frames 7839000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306640/1666666 [2:04:36<28:23, 211.32it/s]

finished frames 7839600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306728/1666666 [2:04:36<28:27, 210.83it/s]

finished frames 7840200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306838/1666666 [2:04:37<30:00, 199.84it/s]

finished frames 7840800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1306924/1666666 [2:04:37<30:40, 195.45it/s]

finished frames 7841400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307032/1666666 [2:04:38<29:24, 203.79it/s]

finished frames 7842000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307141/1666666 [2:04:38<28:31, 210.05it/s]

finished frames 7842600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307229/1666666 [2:04:39<28:28, 210.39it/s]

finished frames 7843200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307339/1666666 [2:04:39<28:25, 210.71it/s]

finished frames 7843800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307427/1666666 [2:04:40<28:24, 210.75it/s]

finished frames 7844400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307537/1666666 [2:04:40<28:24, 210.73it/s]

finished frames 7845000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307625/1666666 [2:04:41<28:21, 211.00it/s]

finished frames 7845600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307735/1666666 [2:04:41<28:19, 211.18it/s]

finished frames 7846200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307823/1666666 [2:04:41<28:27, 210.11it/s]

finished frames 7846800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1307933/1666666 [2:04:42<28:24, 210.41it/s]

finished frames 7847400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1308020/1666666 [2:04:42<29:13, 204.55it/s]

finished frames 7848000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1308128/1666666 [2:04:43<28:31, 209.50it/s]

finished frames 7848600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 78%|███████▊  | 1308236/1666666 [2:04:43<28:21, 210.63it/s]

finished frames 7849200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1308343/1666666 [2:04:44<28:35, 208.85it/s]

finished frames 7849800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1308428/1666666 [2:04:44<28:30, 209.43it/s]

finished frames 7850400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1308536/1666666 [2:04:45<28:26, 209.87it/s]

finished frames 7851000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1308622/1666666 [2:04:45<28:30, 209.35it/s]

finished frames 7851600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1308729/1666666 [2:04:46<28:31, 209.13it/s]

finished frames 7852200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1308836/1666666 [2:04:46<28:26, 209.74it/s]

finished frames 7852800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1308941/1666666 [2:04:47<29:30, 202.00it/s]

finished frames 7853400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309025/1666666 [2:04:47<29:41, 200.70it/s]

finished frames 7854000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309131/1666666 [2:04:48<28:40, 207.86it/s]

finished frames 7854600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309237/1666666 [2:04:48<30:55, 192.67it/s]

finished frames 7855200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309323/1666666 [2:04:49<29:03, 205.01it/s]

finished frames 7855800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309430/1666666 [2:04:49<28:33, 208.44it/s]

finished frames 7856400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309538/1666666 [2:04:50<28:21, 209.88it/s]

finished frames 7857000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309623/1666666 [2:04:50<28:23, 209.62it/s]

finished frames 7857600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309730/1666666 [2:04:51<28:21, 209.76it/s]

finished frames 7858200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309837/1666666 [2:04:51<28:16, 210.35it/s]

finished frames 7858800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1309925/1666666 [2:04:52<28:17, 210.12it/s]

finished frames 7859400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310031/1666666 [2:04:52<29:07, 204.09it/s]

finished frames 7860000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310136/1666666 [2:04:53<28:47, 206.41it/s]

finished frames 7860600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310241/1666666 [2:04:53<28:37, 207.58it/s]

finished frames 7861200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310325/1666666 [2:04:54<28:45, 206.52it/s]

finished frames 7861800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310430/1666666 [2:04:54<28:43, 206.75it/s]

finished frames 7862400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310535/1666666 [2:04:55<28:39, 207.16it/s]

finished frames 7863000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310640/1666666 [2:04:55<28:46, 206.27it/s]

finished frames 7863600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310724/1666666 [2:04:55<28:47, 206.06it/s]

finished frames 7864200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310829/1666666 [2:04:56<28:53, 205.27it/s]

finished frames 7864800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1310934/1666666 [2:04:57<28:40, 206.73it/s]

finished frames 7865400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311039/1666666 [2:04:57<29:24, 201.58it/s]

finished frames 7866000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311123/1666666 [2:04:57<28:52, 205.25it/s]

finished frames 7866600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311228/1666666 [2:04:58<28:51, 205.31it/s]

finished frames 7867200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311333/1666666 [2:04:58<28:46, 205.87it/s]

finished frames 7867800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311438/1666666 [2:04:59<28:40, 206.45it/s]

finished frames 7868400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311522/1666666 [2:04:59<28:46, 205.70it/s]

finished frames 7869000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311627/1666666 [2:05:00<29:52, 198.05it/s]

finished frames 7869600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311733/1666666 [2:05:00<28:36, 206.73it/s]

finished frames 7870200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311838/1666666 [2:05:01<28:23, 208.26it/s]

finished frames 7870800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1311923/1666666 [2:05:01<28:24, 208.17it/s]

finished frames 7871400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1312030/1666666 [2:05:02<28:54, 204.44it/s]

finished frames 7872000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1312135/1666666 [2:05:02<29:03, 203.40it/s]

finished frames 7872600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1312243/1666666 [2:05:03<28:09, 209.78it/s]

finished frames 7873200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1312330/1666666 [2:05:03<28:00, 210.89it/s]

finished frames 7873800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▊  | 1312440/1666666 [2:05:04<27:56, 211.34it/s]

finished frames 7874400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1312528/1666666 [2:05:04<27:51, 211.82it/s]

finished frames 7875000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1312638/1666666 [2:05:05<27:45, 212.56it/s]

finished frames 7875600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1312726/1666666 [2:05:05<27:48, 212.13it/s]

finished frames 7876200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1312836/1666666 [2:05:06<27:47, 212.15it/s]

finished frames 7876800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1312924/1666666 [2:05:06<27:48, 212.06it/s]

finished frames 7877400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313034/1666666 [2:05:07<28:29, 206.90it/s]

finished frames 7878000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313142/1666666 [2:05:07<27:46, 212.12it/s]

finished frames 7878600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313230/1666666 [2:05:08<27:40, 212.87it/s]

finished frames 7879200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313340/1666666 [2:05:08<27:50, 211.55it/s]

finished frames 7879800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313428/1666666 [2:05:08<27:50, 211.48it/s]

finished frames 7880400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313538/1666666 [2:05:09<27:47, 211.76it/s]

finished frames 7881000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313626/1666666 [2:05:09<27:48, 211.60it/s]

finished frames 7881600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313736/1666666 [2:05:10<27:43, 212.20it/s]

finished frames 7882200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313824/1666666 [2:05:10<29:27, 199.68it/s]

finished frames 7882800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1313934/1666666 [2:05:11<30:13, 194.49it/s]

finished frames 7883400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314020/1666666 [2:05:11<29:18, 200.55it/s]

finished frames 7884000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314129/1666666 [2:05:12<28:01, 209.72it/s]

finished frames 7884600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314239/1666666 [2:05:12<27:44, 211.77it/s]

finished frames 7885200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314327/1666666 [2:05:13<27:45, 211.53it/s]

finished frames 7885800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314437/1666666 [2:05:13<27:33, 213.07it/s]

finished frames 7886400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314525/1666666 [2:05:14<27:35, 212.77it/s]

finished frames 7887000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314635/1666666 [2:05:14<27:33, 212.90it/s]

finished frames 7887600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314723/1666666 [2:05:15<27:37, 212.35it/s]

finished frames 7888200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314833/1666666 [2:05:15<27:41, 211.79it/s]

finished frames 7888800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1314943/1666666 [2:05:16<27:30, 213.09it/s]

finished frames 7889400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315031/1666666 [2:05:16<28:13, 207.64it/s]

finished frames 7890000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315138/1666666 [2:05:17<28:07, 208.26it/s]

finished frames 7890600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315222/1666666 [2:05:17<28:18, 206.89it/s]

finished frames 7891200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315327/1666666 [2:05:18<28:19, 206.71it/s]

finished frames 7891800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315432/1666666 [2:05:18<28:14, 207.34it/s]

finished frames 7892400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315537/1666666 [2:05:19<28:14, 207.26it/s]

finished frames 7893000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315642/1666666 [2:05:19<28:11, 207.58it/s]

finished frames 7893600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315726/1666666 [2:05:19<28:15, 207.02it/s]

finished frames 7894200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315831/1666666 [2:05:20<28:10, 207.49it/s]

finished frames 7894800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1315936/1666666 [2:05:20<28:08, 207.69it/s]

finished frames 7895400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316020/1666666 [2:05:21<29:03, 201.16it/s]

finished frames 7896000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316125/1666666 [2:05:21<28:13, 207.00it/s]

finished frames 7896600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316230/1666666 [2:05:22<28:15, 206.66it/s]

finished frames 7897200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316335/1666666 [2:05:22<29:24, 198.56it/s]

finished frames 7897800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316440/1666666 [2:05:23<28:16, 206.42it/s]

finished frames 7898400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316524/1666666 [2:05:23<28:08, 207.32it/s]

finished frames 7899000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316630/1666666 [2:05:24<28:06, 207.50it/s]

finished frames 7899600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316739/1666666 [2:05:24<27:31, 211.87it/s]

finished frames 7900200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316827/1666666 [2:05:25<27:35, 211.36it/s]

finished frames 7900800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1316937/1666666 [2:05:25<27:31, 211.77it/s]

finished frames 7901400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317024/1666666 [2:05:26<28:14, 206.28it/s]

finished frames 7902000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317132/1666666 [2:05:26<27:42, 210.26it/s]

finished frames 7902600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317242/1666666 [2:05:27<27:33, 211.27it/s]

finished frames 7903200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317330/1666666 [2:05:27<27:38, 210.58it/s]

finished frames 7903800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317440/1666666 [2:05:28<27:22, 212.59it/s]

finished frames 7904400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317528/1666666 [2:05:28<27:29, 211.66it/s]

finished frames 7905000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317638/1666666 [2:05:29<27:17, 213.19it/s]

finished frames 7905600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317726/1666666 [2:05:29<27:12, 213.79it/s]

finished frames 7906200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317836/1666666 [2:05:30<27:09, 214.03it/s]

finished frames 7906800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1317924/1666666 [2:05:30<27:07, 214.31it/s]

finished frames 7907400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318034/1666666 [2:05:31<27:39, 210.13it/s]

finished frames 7908000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318144/1666666 [2:05:31<26:57, 215.52it/s]

finished frames 7908600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318232/1666666 [2:05:31<27:06, 214.16it/s]

finished frames 7909200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318342/1666666 [2:05:32<27:04, 214.39it/s]

finished frames 7909800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318430/1666666 [2:05:32<27:07, 214.00it/s]

finished frames 7910400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318518/1666666 [2:05:33<27:06, 214.07it/s]

finished frames 7911000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318628/1666666 [2:05:33<27:44, 209.05it/s]

finished frames 7911600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318738/1666666 [2:05:34<27:20, 212.10it/s]

finished frames 7912200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318826/1666666 [2:05:34<27:20, 212.05it/s]

finished frames 7912800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1318936/1666666 [2:05:35<27:17, 212.30it/s]

finished frames 7913400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319024/1666666 [2:05:35<27:52, 207.86it/s]

finished frames 7914000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319134/1666666 [2:05:36<27:21, 211.74it/s]

finished frames 7914600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319244/1666666 [2:05:36<27:11, 212.98it/s]

finished frames 7915200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319332/1666666 [2:05:37<27:17, 212.14it/s]

finished frames 7915800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319442/1666666 [2:05:37<27:15, 212.30it/s]

finished frames 7916400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319530/1666666 [2:05:38<27:15, 212.29it/s]

finished frames 7917000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319640/1666666 [2:05:38<27:10, 212.84it/s]

finished frames 7917600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319728/1666666 [2:05:39<27:11, 212.70it/s]

finished frames 7918200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319838/1666666 [2:05:39<27:10, 212.72it/s]

finished frames 7918800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1319926/1666666 [2:05:39<27:08, 212.88it/s]

finished frames 7919400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320036/1666666 [2:05:40<27:51, 207.43it/s]

finished frames 7920000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320124/1666666 [2:05:40<27:14, 211.96it/s]

finished frames 7920600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320234/1666666 [2:05:41<26:52, 214.78it/s]

finished frames 7921200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320344/1666666 [2:05:41<26:56, 214.19it/s]

finished frames 7921800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320432/1666666 [2:05:42<26:59, 213.78it/s]

finished frames 7922400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320542/1666666 [2:05:42<26:58, 213.80it/s]

finished frames 7923000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320630/1666666 [2:05:43<27:05, 212.82it/s]

finished frames 7923600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320740/1666666 [2:05:43<27:03, 213.06it/s]

finished frames 7924200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320828/1666666 [2:05:44<27:13, 211.70it/s]

finished frames 7924800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1320916/1666666 [2:05:44<27:22, 210.56it/s]

finished frames 7925400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321024/1666666 [2:05:45<28:24, 202.77it/s]

finished frames 7926000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321132/1666666 [2:05:45<27:55, 206.20it/s]

finished frames 7926600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321241/1666666 [2:05:46<27:15, 211.18it/s]

finished frames 7927200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321329/1666666 [2:05:46<27:09, 211.99it/s]

finished frames 7927800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321439/1666666 [2:05:47<27:22, 210.20it/s]

finished frames 7928400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321527/1666666 [2:05:47<27:13, 211.27it/s]

finished frames 7929000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321637/1666666 [2:05:48<27:07, 212.06it/s]

finished frames 7929600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321725/1666666 [2:05:48<27:07, 211.93it/s]

finished frames 7930200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321835/1666666 [2:05:49<27:06, 212.02it/s]

finished frames 7930800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1321923/1666666 [2:05:49<27:09, 211.61it/s]

finished frames 7931400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322033/1666666 [2:05:49<27:36, 208.10it/s]

finished frames 7932000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322143/1666666 [2:05:50<27:04, 212.04it/s]

finished frames 7932600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322231/1666666 [2:05:50<27:04, 212.08it/s]

finished frames 7933200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322341/1666666 [2:05:51<27:02, 212.18it/s]

finished frames 7933800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322429/1666666 [2:05:51<27:01, 212.34it/s]

finished frames 7934400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322539/1666666 [2:05:52<27:04, 211.77it/s]

finished frames 7935000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322627/1666666 [2:05:52<27:21, 209.59it/s]

finished frames 7935600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322733/1666666 [2:05:53<27:27, 208.79it/s]

finished frames 7936200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322839/1666666 [2:05:53<27:40, 207.04it/s]

finished frames 7936800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1322921/1666666 [2:05:54<29:19, 195.35it/s]

finished frames 7937400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323022/1666666 [2:05:54<28:54, 198.17it/s]

finished frames 7938000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323132/1666666 [2:05:55<27:02, 211.67it/s]

finished frames 7938600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323242/1666666 [2:05:55<27:07, 211.02it/s]

finished frames 7939200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323330/1666666 [2:05:56<27:45, 206.13it/s]

finished frames 7939800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323442/1666666 [2:05:56<26:22, 216.92it/s]

finished frames 7940400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323532/1666666 [2:05:57<26:09, 218.63it/s]

finished frames 7941000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323642/1666666 [2:05:57<26:09, 218.50it/s]

finished frames 7941600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323730/1666666 [2:05:58<26:11, 218.23it/s]

finished frames 7942200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323844/1666666 [2:05:58<25:55, 220.37it/s]

finished frames 7942800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1323935/1666666 [2:05:58<25:56, 220.18it/s]

finished frames 7943400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324024/1666666 [2:05:59<27:21, 208.77it/s]

finished frames 7944000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324134/1666666 [2:05:59<26:47, 213.09it/s]

finished frames 7944600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324226/1666666 [2:06:00<26:02, 219.19it/s]

finished frames 7945200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324341/1666666 [2:06:00<25:48, 221.13it/s]

finished frames 7945800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324430/1666666 [2:06:01<26:17, 216.94it/s]

finished frames 7946400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324540/1666666 [2:06:01<26:30, 215.09it/s]

finished frames 7947000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324628/1666666 [2:06:02<27:08, 209.97it/s]

finished frames 7947600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324736/1666666 [2:06:02<26:58, 211.28it/s]

finished frames 7948200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324824/1666666 [2:06:03<27:19, 208.45it/s]

finished frames 7948800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 79%|███████▉  | 1324933/1666666 [2:06:03<26:52, 211.87it/s]

finished frames 7949400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325021/1666666 [2:06:04<27:38, 206.04it/s]

finished frames 7950000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325129/1666666 [2:06:04<27:01, 210.64it/s]

finished frames 7950600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325239/1666666 [2:06:05<26:50, 212.02it/s]

finished frames 7951200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325327/1666666 [2:06:05<26:51, 211.82it/s]

finished frames 7951800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325437/1666666 [2:06:06<26:53, 211.48it/s]

finished frames 7952400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325525/1666666 [2:06:06<26:55, 211.17it/s]

finished frames 7953000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325635/1666666 [2:06:06<26:55, 211.16it/s]

finished frames 7953600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325723/1666666 [2:06:07<28:30, 199.27it/s]

finished frames 7954200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325832/1666666 [2:06:07<28:53, 196.63it/s]

finished frames 7954800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1325941/1666666 [2:06:08<27:07, 209.34it/s]

finished frames 7955400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326028/1666666 [2:06:08<27:32, 206.19it/s]

finished frames 7956000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326137/1666666 [2:06:09<26:52, 211.21it/s]

finished frames 7956600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326225/1666666 [2:06:09<26:48, 211.66it/s]

finished frames 7957200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326335/1666666 [2:06:10<26:37, 213.05it/s]

finished frames 7957800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326423/1666666 [2:06:10<26:39, 212.77it/s]

finished frames 7958400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326533/1666666 [2:06:11<26:29, 214.01it/s]

finished frames 7959000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326643/1666666 [2:06:11<26:28, 214.09it/s]

finished frames 7959600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326731/1666666 [2:06:12<26:30, 213.72it/s]

finished frames 7960200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326841/1666666 [2:06:12<26:25, 214.28it/s]

finished frames 7960800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1326929/1666666 [2:06:13<26:29, 213.77it/s]

finished frames 7961400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327039/1666666 [2:06:13<27:12, 208.02it/s]

finished frames 7962000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327127/1666666 [2:06:14<26:47, 211.26it/s]

finished frames 7962600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327237/1666666 [2:06:14<26:37, 212.46it/s]

finished frames 7963200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327325/1666666 [2:06:15<26:38, 212.33it/s]

finished frames 7963800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327435/1666666 [2:06:15<26:38, 212.17it/s]

finished frames 7964400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327523/1666666 [2:06:15<26:52, 210.33it/s]

finished frames 7965000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327633/1666666 [2:06:16<26:19, 214.62it/s]

finished frames 7965600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327743/1666666 [2:06:16<26:32, 212.78it/s]

finished frames 7966200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327831/1666666 [2:06:17<26:37, 212.06it/s]

finished frames 7966800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1327941/1666666 [2:06:17<26:25, 213.60it/s]

finished frames 7967400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328029/1666666 [2:06:18<27:10, 207.74it/s]

finished frames 7968000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328137/1666666 [2:06:18<27:46, 203.16it/s]

finished frames 7968600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328243/1666666 [2:06:19<28:18, 199.22it/s]

finished frames 7969200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328328/1666666 [2:06:19<27:14, 206.94it/s]

finished frames 7969800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328434/1666666 [2:06:20<26:57, 209.08it/s]

finished frames 7970400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328542/1666666 [2:06:20<26:50, 209.95it/s]

finished frames 7971000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328627/1666666 [2:06:21<26:53, 209.48it/s]

finished frames 7971600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328735/1666666 [2:06:21<26:50, 209.79it/s]

finished frames 7972200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328841/1666666 [2:06:22<27:03, 208.15it/s]

finished frames 7972800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1328925/1666666 [2:06:22<27:10, 207.15it/s]

finished frames 7973400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329031/1666666 [2:06:23<27:41, 203.21it/s]

finished frames 7974000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329138/1666666 [2:06:23<26:52, 209.31it/s]

finished frames 7974600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329226/1666666 [2:06:24<26:34, 211.63it/s]

finished frames 7975200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329336/1666666 [2:06:24<26:34, 211.53it/s]

finished frames 7975800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329424/1666666 [2:06:25<26:29, 212.13it/s]

finished frames 7976400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329534/1666666 [2:06:25<26:31, 211.89it/s]

finished frames 7977000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329622/1666666 [2:06:25<26:33, 211.55it/s]

finished frames 7977600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329732/1666666 [2:06:26<26:37, 210.88it/s]

finished frames 7978200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329842/1666666 [2:06:27<26:33, 211.39it/s]

finished frames 7978800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1329930/1666666 [2:06:27<26:28, 212.00it/s]

finished frames 7979400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330040/1666666 [2:06:27<26:59, 207.82it/s]

finished frames 7980000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330128/1666666 [2:06:28<26:27, 211.93it/s]

finished frames 7980600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330238/1666666 [2:06:28<26:20, 212.80it/s]

finished frames 7981200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330326/1666666 [2:06:29<26:22, 212.60it/s]

finished frames 7981800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330436/1666666 [2:06:29<28:30, 196.54it/s]

finished frames 7982400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330523/1666666 [2:06:30<26:52, 208.44it/s]

finished frames 7983000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330633/1666666 [2:06:30<26:20, 212.54it/s]

finished frames 7983600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330743/1666666 [2:06:31<26:13, 213.55it/s]

finished frames 7984200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330831/1666666 [2:06:31<26:13, 213.37it/s]

finished frames 7984800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1330941/1666666 [2:06:32<26:03, 214.66it/s]

finished frames 7985400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331029/1666666 [2:06:32<26:32, 210.77it/s]

finished frames 7986000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331139/1666666 [2:06:33<26:10, 213.58it/s]

finished frames 7986600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331227/1666666 [2:06:33<26:06, 214.17it/s]

finished frames 7987200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331337/1666666 [2:06:34<26:02, 214.58it/s]

finished frames 7987800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331425/1666666 [2:06:34<26:03, 214.47it/s]

finished frames 7988400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331535/1666666 [2:06:35<26:06, 213.97it/s]

finished frames 7989000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331623/1666666 [2:06:35<25:57, 215.16it/s]

finished frames 7989600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331733/1666666 [2:06:35<25:57, 215.05it/s]

finished frames 7990200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331843/1666666 [2:06:36<26:00, 214.62it/s]

finished frames 7990800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1331931/1666666 [2:06:36<26:06, 213.65it/s]

finished frames 7991400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332041/1666666 [2:06:37<26:46, 208.23it/s]

finished frames 7992000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332125/1666666 [2:06:37<27:06, 205.67it/s]

finished frames 7992600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332230/1666666 [2:06:38<27:09, 205.21it/s]

finished frames 7993200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332337/1666666 [2:06:38<26:38, 209.12it/s]

finished frames 7993800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332425/1666666 [2:06:39<26:21, 211.37it/s]

finished frames 7994400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332535/1666666 [2:06:39<26:13, 212.39it/s]

finished frames 7995000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332623/1666666 [2:06:40<26:12, 212.41it/s]

finished frames 7995600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332733/1666666 [2:06:40<25:57, 214.41it/s]

finished frames 7996200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332821/1666666 [2:06:41<28:08, 197.76it/s]

finished frames 7996800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1332931/1666666 [2:06:41<26:24, 210.63it/s]

finished frames 7997400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1333041/1666666 [2:06:42<26:45, 207.82it/s]

finished frames 7998000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1333129/1666666 [2:06:42<26:17, 211.40it/s]

finished frames 7998600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1333238/1666666 [2:06:43<26:25, 210.33it/s]

finished frames 7999200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|███████▉  | 1333325/1666666 [2:06:43<26:30, 209.64it/s]

finished frames 7999800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1333435/1666666 [2:06:44<26:27, 209.96it/s]

finished frames 8000400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1333522/1666666 [2:06:44<26:26, 210.01it/s]

finished frames 8001000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1333632/1666666 [2:06:44<26:25, 210.09it/s]

finished frames 8001600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1333742/1666666 [2:06:45<26:23, 210.30it/s]

finished frames 8002200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1333830/1666666 [2:06:45<26:27, 209.65it/s]

finished frames 8002800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1333937/1666666 [2:06:46<26:27, 209.55it/s]

finished frames 8003400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334021/1666666 [2:06:46<28:21, 195.51it/s]

finished frames 8004000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334126/1666666 [2:06:47<27:16, 203.20it/s]

finished frames 8004600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334231/1666666 [2:06:47<27:00, 205.15it/s]

finished frames 8005200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334336/1666666 [2:06:48<26:50, 206.34it/s]

finished frames 8005800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334441/1666666 [2:06:48<26:49, 206.47it/s]

finished frames 8006400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334525/1666666 [2:06:49<26:51, 206.12it/s]

finished frames 8007000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334630/1666666 [2:06:49<26:49, 206.34it/s]

finished frames 8007600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334737/1666666 [2:06:50<26:29, 208.87it/s]

finished frames 8008200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334843/1666666 [2:06:50<26:22, 209.67it/s]

finished frames 8008800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1334928/1666666 [2:06:51<26:25, 209.25it/s]

finished frames 8009400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335033/1666666 [2:06:51<27:04, 204.17it/s]

finished frames 8010000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335141/1666666 [2:06:52<26:26, 208.97it/s]

finished frames 8010600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335227/1666666 [2:06:52<27:16, 202.54it/s]

finished frames 8011200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335334/1666666 [2:06:53<26:30, 208.30it/s]

finished frames 8011800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335441/1666666 [2:06:53<26:20, 209.53it/s]

finished frames 8012400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335525/1666666 [2:06:54<26:25, 208.80it/s]

finished frames 8013000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335630/1666666 [2:06:54<26:54, 205.04it/s]

finished frames 8013600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335739/1666666 [2:06:55<26:03, 211.62it/s]

finished frames 8014200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335827/1666666 [2:06:55<26:01, 211.84it/s]

finished frames 8014800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1335937/1666666 [2:06:56<25:48, 213.61it/s]

finished frames 8015400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336025/1666666 [2:06:56<26:26, 208.39it/s]

finished frames 8016000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336135/1666666 [2:06:57<25:57, 212.16it/s]

finished frames 8016600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336223/1666666 [2:06:57<25:55, 212.40it/s]

finished frames 8017200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336333/1666666 [2:06:57<25:52, 212.77it/s]

finished frames 8017800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336443/1666666 [2:06:58<25:49, 213.12it/s]

finished frames 8018400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336531/1666666 [2:06:58<25:54, 212.38it/s]

finished frames 8019000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336641/1666666 [2:06:59<25:49, 213.00it/s]

finished frames 8019600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336729/1666666 [2:06:59<25:53, 212.36it/s]

finished frames 8020200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336839/1666666 [2:07:00<25:50, 212.76it/s]

finished frames 8020800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1336927/1666666 [2:07:00<25:59, 211.40it/s]

finished frames 8021400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337037/1666666 [2:07:01<26:25, 207.90it/s]

finished frames 8022000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337125/1666666 [2:07:01<26:01, 211.09it/s]

finished frames 8022600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337235/1666666 [2:07:02<25:51, 212.36it/s]

finished frames 8023200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337323/1666666 [2:07:02<25:50, 212.46it/s]

finished frames 8023800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337433/1666666 [2:07:03<25:43, 213.29it/s]

finished frames 8024400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337521/1666666 [2:07:03<25:48, 212.54it/s]

finished frames 8025000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337630/1666666 [2:07:04<26:26, 207.35it/s]

finished frames 8025600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337740/1666666 [2:07:04<25:55, 211.49it/s]

finished frames 8026200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337828/1666666 [2:07:05<25:55, 211.38it/s]

finished frames 8026800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1337938/1666666 [2:07:05<25:51, 211.81it/s]

finished frames 8027400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338025/1666666 [2:07:05<26:27, 207.03it/s]

finished frames 8028000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338134/1666666 [2:07:06<26:01, 210.45it/s]

finished frames 8028600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338244/1666666 [2:07:07<25:45, 212.51it/s]

finished frames 8029200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338332/1666666 [2:07:07<25:47, 212.23it/s]

finished frames 8029800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338442/1666666 [2:07:07<25:45, 212.44it/s]

finished frames 8030400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338530/1666666 [2:07:08<25:47, 212.01it/s]

finished frames 8031000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338640/1666666 [2:07:08<25:45, 212.20it/s]

finished frames 8031600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338728/1666666 [2:07:09<25:46, 212.05it/s]

finished frames 8032200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338838/1666666 [2:07:09<25:46, 211.94it/s]

finished frames 8032800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1338926/1666666 [2:07:10<25:49, 211.50it/s]

finished frames 8033400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339035/1666666 [2:07:10<26:47, 203.83it/s]

finished frames 8034000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339143/1666666 [2:07:11<26:03, 209.50it/s]

finished frames 8034600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339230/1666666 [2:07:11<26:09, 208.57it/s]

finished frames 8035200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339338/1666666 [2:07:12<25:59, 209.88it/s]

finished frames 8035800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339426/1666666 [2:07:12<25:53, 210.68it/s]

finished frames 8036400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339534/1666666 [2:07:13<26:04, 209.03it/s]

finished frames 8037000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339641/1666666 [2:07:13<25:58, 209.88it/s]

finished frames 8037600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339727/1666666 [2:07:14<26:00, 209.46it/s]

finished frames 8038200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339833/1666666 [2:07:14<26:02, 209.17it/s]

finished frames 8038800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1339940/1666666 [2:07:15<27:34, 197.44it/s]

finished frames 8039400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340025/1666666 [2:07:15<29:14, 186.18it/s]

finished frames 8040000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340130/1666666 [2:07:16<26:45, 203.42it/s]

finished frames 8040600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340236/1666666 [2:07:16<26:16, 207.06it/s]

finished frames 8041200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340341/1666666 [2:07:17<26:13, 207.39it/s]

finished frames 8041800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340425/1666666 [2:07:17<26:26, 205.60it/s]

finished frames 8042400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340530/1666666 [2:07:18<26:30, 205.04it/s]

finished frames 8043000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340636/1666666 [2:07:18<26:10, 207.62it/s]

finished frames 8043600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340744/1666666 [2:07:19<25:42, 211.26it/s]

finished frames 8044200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340832/1666666 [2:07:19<25:38, 211.78it/s]

finished frames 8044800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1340942/1666666 [2:07:19<25:26, 213.38it/s]

finished frames 8045400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1341030/1666666 [2:07:20<26:05, 208.03it/s]

finished frames 8046000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1341138/1666666 [2:07:20<25:52, 209.71it/s]

finished frames 8046600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1341226/1666666 [2:07:21<25:40, 211.32it/s]

finished frames 8047200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1341336/1666666 [2:07:21<25:30, 212.51it/s]

finished frames 8047800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1341424/1666666 [2:07:22<25:36, 211.66it/s]

finished frames 8048400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1341534/1666666 [2:07:22<25:38, 211.31it/s]

finished frames 8049000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 80%|████████  | 1341622/1666666 [2:07:23<25:38, 211.34it/s]

finished frames 8049600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1341732/1666666 [2:07:23<25:32, 212.02it/s]

finished frames 8050200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1341842/1666666 [2:07:24<25:32, 211.97it/s]

finished frames 8050800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1341930/1666666 [2:07:24<25:32, 211.91it/s]

finished frames 8051400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342040/1666666 [2:07:25<26:07, 207.14it/s]

finished frames 8052000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342127/1666666 [2:07:25<25:44, 210.08it/s]

finished frames 8052600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342237/1666666 [2:07:26<25:30, 212.04it/s]

finished frames 8053200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342324/1666666 [2:07:26<26:20, 205.16it/s]

finished frames 8053800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342434/1666666 [2:07:27<26:34, 203.30it/s]

finished frames 8054400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342544/1666666 [2:07:27<25:30, 211.73it/s]

finished frames 8055000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342632/1666666 [2:07:28<25:19, 213.22it/s]

finished frames 8055600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342742/1666666 [2:07:28<25:18, 213.26it/s]

finished frames 8056200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342830/1666666 [2:07:28<25:17, 213.34it/s]

finished frames 8056800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1342940/1666666 [2:07:29<25:16, 213.46it/s]

finished frames 8057400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343027/1666666 [2:07:29<26:01, 207.24it/s]

finished frames 8058000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343137/1666666 [2:07:30<25:21, 212.60it/s]

finished frames 8058600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343225/1666666 [2:07:30<25:19, 212.79it/s]

finished frames 8059200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343335/1666666 [2:07:31<25:05, 214.82it/s]

finished frames 8059800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343423/1666666 [2:07:31<25:13, 213.59it/s]

finished frames 8060400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343533/1666666 [2:07:32<25:14, 213.29it/s]

finished frames 8061000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343643/1666666 [2:07:32<25:21, 212.29it/s]

finished frames 8061600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343731/1666666 [2:07:33<25:26, 211.57it/s]

finished frames 8062200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343841/1666666 [2:07:33<25:35, 210.22it/s]

finished frames 8062800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1343926/1666666 [2:07:34<25:46, 208.73it/s]

finished frames 8063400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344031/1666666 [2:07:34<26:30, 202.85it/s]

finished frames 8064000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344136/1666666 [2:07:35<25:53, 207.63it/s]

finished frames 8064600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344242/1666666 [2:07:35<25:44, 208.75it/s]

finished frames 8065200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344326/1666666 [2:07:36<25:47, 208.23it/s]

finished frames 8065800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344432/1666666 [2:07:36<25:43, 208.74it/s]

finished frames 8066400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344537/1666666 [2:07:37<25:44, 208.54it/s]

finished frames 8067000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344621/1666666 [2:07:37<28:26, 188.71it/s]

finished frames 8067600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344729/1666666 [2:07:38<25:46, 208.16it/s]

finished frames 8068200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344838/1666666 [2:07:38<25:51, 207.47it/s]

finished frames 8068800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1344926/1666666 [2:07:38<25:24, 211.10it/s]

finished frames 8069400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345036/1666666 [2:07:39<25:53, 207.08it/s]

finished frames 8070000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345124/1666666 [2:07:39<25:27, 210.55it/s]

finished frames 8070600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345234/1666666 [2:07:40<25:13, 212.32it/s]

finished frames 8071200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345322/1666666 [2:07:40<25:12, 212.40it/s]

finished frames 8071800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345432/1666666 [2:07:41<25:03, 213.69it/s]

finished frames 8072400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345542/1666666 [2:07:41<25:06, 213.10it/s]

finished frames 8073000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345630/1666666 [2:07:42<25:13, 212.07it/s]

finished frames 8073600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345740/1666666 [2:07:42<25:09, 212.64it/s]

finished frames 8074200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345828/1666666 [2:07:43<25:12, 212.12it/s]

finished frames 8074800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1345938/1666666 [2:07:43<25:06, 212.93it/s]

finished frames 8075400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346025/1666666 [2:07:44<25:46, 207.30it/s]

finished frames 8076000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346134/1666666 [2:07:44<25:15, 211.49it/s]

finished frames 8076600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346222/1666666 [2:07:45<25:14, 211.58it/s]

finished frames 8077200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346332/1666666 [2:07:45<25:03, 213.12it/s]

finished frames 8077800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346442/1666666 [2:07:46<25:04, 212.89it/s]

finished frames 8078400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346530/1666666 [2:07:46<25:03, 212.96it/s]

finished frames 8079000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346640/1666666 [2:07:47<25:01, 213.08it/s]

finished frames 8079600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346728/1666666 [2:07:47<25:28, 209.33it/s]

finished frames 8080200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346838/1666666 [2:07:48<24:59, 213.28it/s]

finished frames 8080800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1346926/1666666 [2:07:48<24:55, 213.74it/s]

finished frames 8081400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347036/1666666 [2:07:48<27:03, 196.91it/s]

finished frames 8082000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347122/1666666 [2:07:49<27:47, 191.69it/s]

finished frames 8082600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347232/1666666 [2:07:49<25:21, 209.90it/s]

finished frames 8083200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347342/1666666 [2:07:50<24:55, 213.47it/s]

finished frames 8083800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347430/1666666 [2:07:50<24:58, 212.99it/s]

finished frames 8084400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347540/1666666 [2:07:51<24:57, 213.09it/s]

finished frames 8085000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347628/1666666 [2:07:51<24:55, 213.38it/s]

finished frames 8085600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347738/1666666 [2:07:52<24:48, 214.29it/s]

finished frames 8086200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347826/1666666 [2:07:52<24:50, 213.92it/s]

finished frames 8086800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1347936/1666666 [2:07:53<24:47, 214.26it/s]

finished frames 8087400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348023/1666666 [2:07:53<26:05, 203.58it/s]

finished frames 8088000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348132/1666666 [2:07:54<24:40, 215.12it/s]

finished frames 8088600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348242/1666666 [2:07:54<24:34, 215.88it/s]

finished frames 8089200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348331/1666666 [2:07:55<24:49, 213.69it/s]

finished frames 8089800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348441/1666666 [2:07:55<25:11, 210.47it/s]

finished frames 8090400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348529/1666666 [2:07:56<24:33, 215.87it/s]

finished frames 8091000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348641/1666666 [2:07:56<24:14, 218.67it/s]

finished frames 8091600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348730/1666666 [2:07:56<24:09, 219.37it/s]

finished frames 8092200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348840/1666666 [2:07:57<24:09, 219.19it/s]

finished frames 8092800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1348929/1666666 [2:07:57<24:10, 218.99it/s]

finished frames 8093400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349020/1666666 [2:07:58<24:49, 213.24it/s]

finished frames 8094000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349131/1666666 [2:07:58<24:25, 216.64it/s]

finished frames 8094600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349241/1666666 [2:07:59<24:51, 212.78it/s]

finished frames 8095200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349329/1666666 [2:07:59<24:38, 214.66it/s]

finished frames 8095800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349443/1666666 [2:08:00<24:03, 219.70it/s]

finished frames 8096400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349535/1666666 [2:08:00<23:57, 220.68it/s]

finished frames 8097000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349624/1666666 [2:08:01<24:16, 217.62it/s]

finished frames 8097600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349734/1666666 [2:08:01<24:20, 217.04it/s]

finished frames 8098200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349822/1666666 [2:08:01<24:44, 213.50it/s]

finished frames 8098800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1349932/1666666 [2:08:02<24:50, 212.55it/s]

finished frames 8099400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350020/1666666 [2:08:02<25:43, 205.18it/s]

finished frames 8100000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350130/1666666 [2:08:03<24:42, 213.50it/s]

finished frames 8100600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350240/1666666 [2:08:03<24:34, 214.66it/s]

finished frames 8101200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350328/1666666 [2:08:04<24:37, 214.14it/s]

finished frames 8101800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350438/1666666 [2:08:04<24:33, 214.66it/s]

finished frames 8102400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350526/1666666 [2:08:05<24:33, 214.62it/s]

finished frames 8103000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350636/1666666 [2:08:05<24:34, 214.38it/s]

finished frames 8103600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350724/1666666 [2:08:06<24:35, 214.20it/s]

finished frames 8104200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350834/1666666 [2:08:06<24:32, 214.46it/s]

finished frames 8104800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1350944/1666666 [2:08:07<24:27, 215.19it/s]

finished frames 8105400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351032/1666666 [2:08:07<25:06, 209.54it/s]

finished frames 8106000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351142/1666666 [2:08:08<24:44, 212.59it/s]

finished frames 8106600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351230/1666666 [2:08:08<24:41, 212.89it/s]

finished frames 8107200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351340/1666666 [2:08:09<24:36, 213.57it/s]

finished frames 8107800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351428/1666666 [2:08:09<24:35, 213.59it/s]

finished frames 8108400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351538/1666666 [2:08:10<24:36, 213.40it/s]

finished frames 8109000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351626/1666666 [2:08:10<24:35, 213.50it/s]

finished frames 8109600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351736/1666666 [2:08:10<24:37, 213.14it/s]

finished frames 8110200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351824/1666666 [2:08:11<26:44, 196.26it/s]

finished frames 8110800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1351934/1666666 [2:08:11<24:52, 210.92it/s]

finished frames 8111400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352021/1666666 [2:08:12<26:09, 200.52it/s]

finished frames 8112000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352130/1666666 [2:08:12<24:52, 210.69it/s]

finished frames 8112600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352240/1666666 [2:08:13<24:33, 213.41it/s]

finished frames 8113200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352328/1666666 [2:08:13<24:31, 213.62it/s]

finished frames 8113800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352438/1666666 [2:08:14<24:28, 214.04it/s]

finished frames 8114400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352526/1666666 [2:08:14<24:50, 210.73it/s]

finished frames 8115000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352634/1666666 [2:08:15<24:58, 209.63it/s]

finished frames 8115600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352741/1666666 [2:08:15<25:04, 208.71it/s]

finished frames 8116200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352826/1666666 [2:08:16<25:02, 208.86it/s]

finished frames 8116800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1352931/1666666 [2:08:16<25:04, 208.60it/s]

finished frames 8117400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353037/1666666 [2:08:17<26:29, 197.28it/s]

finished frames 8118000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353122/1666666 [2:08:17<25:23, 205.87it/s]

finished frames 8118600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353228/1666666 [2:08:18<25:10, 207.51it/s]

finished frames 8119200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353336/1666666 [2:08:18<24:54, 209.67it/s]

finished frames 8119800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353443/1666666 [2:08:19<24:56, 209.28it/s]

finished frames 8120400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353527/1666666 [2:08:19<24:58, 208.92it/s]

finished frames 8121000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353634/1666666 [2:08:20<24:55, 209.34it/s]

finished frames 8121600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353742/1666666 [2:08:20<24:48, 210.24it/s]

finished frames 8122200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353829/1666666 [2:08:20<24:50, 209.85it/s]

finished frames 8122800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1353935/1666666 [2:08:21<24:53, 209.42it/s]

finished frames 8123400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1354020/1666666 [2:08:21<26:40, 195.35it/s]

finished frames 8124000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████  | 1354127/1666666 [2:08:22<25:09, 207.09it/s]

finished frames 8124600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1354234/1666666 [2:08:22<25:26, 204.70it/s]

finished frames 8125200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1354343/1666666 [2:08:23<25:37, 203.16it/s]

finished frames 8125800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1354430/1666666 [2:08:23<24:46, 210.06it/s]

finished frames 8126400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1354540/1666666 [2:08:24<24:25, 212.91it/s]

finished frames 8127000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1354628/1666666 [2:08:24<24:31, 212.07it/s]

finished frames 8127600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1354738/1666666 [2:08:25<24:29, 212.33it/s]

finished frames 8128200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1354826/1666666 [2:08:25<24:29, 212.21it/s]

finished frames 8128800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1354936/1666666 [2:08:26<24:24, 212.81it/s]

finished frames 8129400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355023/1666666 [2:08:26<25:26, 204.16it/s]

finished frames 8130000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355128/1666666 [2:08:27<25:14, 205.65it/s]

finished frames 8130600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355233/1666666 [2:08:27<25:06, 206.66it/s]

finished frames 8131200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355338/1666666 [2:08:28<25:08, 206.45it/s]

finished frames 8131800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355422/1666666 [2:08:28<25:10, 206.06it/s]

finished frames 8132400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355527/1666666 [2:08:29<25:00, 207.35it/s]

finished frames 8133000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355632/1666666 [2:08:29<25:05, 206.66it/s]

finished frames 8133600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355737/1666666 [2:08:30<25:02, 206.95it/s]

finished frames 8134200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355842/1666666 [2:08:30<25:05, 206.46it/s]

finished frames 8134800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1355926/1666666 [2:08:31<25:10, 205.69it/s]

finished frames 8135400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356031/1666666 [2:08:31<25:41, 201.47it/s]

finished frames 8136000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356136/1666666 [2:08:32<25:11, 205.47it/s]

finished frames 8136600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356241/1666666 [2:08:32<24:56, 207.48it/s]

finished frames 8137200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356325/1666666 [2:08:33<24:56, 207.34it/s]

finished frames 8137800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356431/1666666 [2:08:33<24:52, 207.85it/s]

finished frames 8138400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356536/1666666 [2:08:34<24:52, 207.78it/s]

finished frames 8139000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356641/1666666 [2:08:34<24:52, 207.79it/s]

finished frames 8139600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356725/1666666 [2:08:34<24:56, 207.10it/s]

finished frames 8140200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356830/1666666 [2:08:35<24:53, 207.42it/s]

finished frames 8140800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1356935/1666666 [2:08:36<24:49, 207.99it/s]

finished frames 8141400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357040/1666666 [2:08:36<25:21, 203.48it/s]

finished frames 8142000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357125/1666666 [2:08:36<24:58, 206.58it/s]

finished frames 8142600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357230/1666666 [2:08:37<25:17, 203.87it/s]

finished frames 8143200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357335/1666666 [2:08:37<25:42, 200.49it/s]

finished frames 8143800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357440/1666666 [2:08:38<25:47, 199.78it/s]

finished frames 8144400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357541/1666666 [2:08:38<25:41, 200.53it/s]

finished frames 8145000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357625/1666666 [2:08:39<25:39, 200.71it/s]

finished frames 8145600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357730/1666666 [2:08:39<25:00, 205.89it/s]

finished frames 8146200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357835/1666666 [2:08:40<24:46, 207.79it/s]

finished frames 8146800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1357941/1666666 [2:08:40<24:39, 208.71it/s]

finished frames 8147400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1358025/1666666 [2:08:41<25:24, 202.44it/s]

finished frames 8148000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1358131/1666666 [2:08:41<24:49, 207.10it/s]

finished frames 8148600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 81%|████████▏ | 1358236/1666666 [2:08:42<24:47, 207.40it/s]

finished frames 8149200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1358341/1666666 [2:08:42<24:49, 207.04it/s]

finished frames 8149800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1358425/1666666 [2:08:43<24:53, 206.42it/s]

finished frames 8150400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1358530/1666666 [2:08:43<24:49, 206.93it/s]

finished frames 8151000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1358635/1666666 [2:08:44<24:42, 207.71it/s]

finished frames 8151600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1358741/1666666 [2:08:44<24:32, 209.10it/s]

finished frames 8152200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1358829/1666666 [2:08:45<26:22, 194.56it/s]

finished frames 8152800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1358938/1666666 [2:08:45<24:29, 209.43it/s]

finished frames 8153400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359025/1666666 [2:08:46<25:03, 204.55it/s]

finished frames 8154000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359130/1666666 [2:08:46<24:51, 206.24it/s]

finished frames 8154600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359235/1666666 [2:08:47<24:53, 205.89it/s]

finished frames 8155200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359340/1666666 [2:08:47<24:49, 206.33it/s]

finished frames 8155800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359424/1666666 [2:08:48<24:48, 206.35it/s]

finished frames 8156400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359529/1666666 [2:08:48<24:47, 206.47it/s]

finished frames 8157000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359634/1666666 [2:08:49<24:40, 207.35it/s]

finished frames 8157600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359740/1666666 [2:08:49<24:34, 208.21it/s]

finished frames 8158200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359824/1666666 [2:08:50<24:37, 207.62it/s]

finished frames 8158800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1359929/1666666 [2:08:50<24:36, 207.75it/s]

finished frames 8159400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360034/1666666 [2:08:51<25:10, 203.07it/s]

finished frames 8160000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360139/1666666 [2:08:51<24:44, 206.50it/s]

finished frames 8160600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360223/1666666 [2:08:51<24:44, 206.48it/s]

finished frames 8161200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360328/1666666 [2:08:52<24:33, 207.90it/s]

finished frames 8161800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360433/1666666 [2:08:53<24:31, 208.11it/s]

finished frames 8162400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360538/1666666 [2:08:53<24:30, 208.13it/s]

finished frames 8163000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360622/1666666 [2:08:53<24:35, 207.36it/s]

finished frames 8163600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360727/1666666 [2:08:54<24:26, 208.68it/s]

finished frames 8164200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360832/1666666 [2:08:54<24:28, 208.24it/s]

finished frames 8164800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1360937/1666666 [2:08:55<24:39, 206.61it/s]

finished frames 8165400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361021/1666666 [2:08:55<25:28, 199.99it/s]

finished frames 8166000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361126/1666666 [2:08:56<24:40, 206.37it/s]

finished frames 8166600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361231/1666666 [2:08:56<24:39, 206.45it/s]

finished frames 8167200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361336/1666666 [2:08:57<25:20, 200.83it/s]

finished frames 8167800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361442/1666666 [2:08:57<24:39, 206.28it/s]

finished frames 8168400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361526/1666666 [2:08:58<24:43, 205.76it/s]

finished frames 8169000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361631/1666666 [2:08:58<24:32, 207.16it/s]

finished frames 8169600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361736/1666666 [2:08:59<24:30, 207.36it/s]

finished frames 8170200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361841/1666666 [2:08:59<24:26, 207.79it/s]

finished frames 8170800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1361925/1666666 [2:09:00<24:26, 207.80it/s]

finished frames 8171400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362031/1666666 [2:09:00<25:03, 202.67it/s]

finished frames 8172000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362139/1666666 [2:09:01<24:11, 209.84it/s]

finished frames 8172600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362227/1666666 [2:09:01<24:01, 211.26it/s]

finished frames 8173200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362337/1666666 [2:09:02<23:56, 211.83it/s]

finished frames 8173800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362425/1666666 [2:09:02<23:57, 211.59it/s]

finished frames 8174400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362535/1666666 [2:09:03<23:56, 211.65it/s]

finished frames 8175000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362623/1666666 [2:09:03<23:54, 211.88it/s]

finished frames 8175600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362733/1666666 [2:09:04<23:49, 212.64it/s]

finished frames 8176200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362843/1666666 [2:09:04<23:51, 212.23it/s]

finished frames 8176800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1362931/1666666 [2:09:05<23:57, 211.30it/s]

finished frames 8177400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363040/1666666 [2:09:05<24:32, 206.26it/s]

finished frames 8178000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363127/1666666 [2:09:05<24:04, 210.09it/s]

finished frames 8178600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363237/1666666 [2:09:06<23:52, 211.82it/s]

finished frames 8179200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363325/1666666 [2:09:06<23:52, 211.83it/s]

finished frames 8179800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363435/1666666 [2:09:07<23:55, 211.19it/s]

finished frames 8180400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363523/1666666 [2:09:07<25:51, 195.33it/s]

finished frames 8181000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363611/1666666 [2:09:08<24:19, 207.60it/s]

finished frames 8181600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363741/1666666 [2:09:08<24:37, 205.03it/s]

finished frames 8182200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363829/1666666 [2:09:09<24:03, 209.81it/s]

finished frames 8182800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1363939/1666666 [2:09:09<23:53, 211.20it/s]

finished frames 8183400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364026/1666666 [2:09:10<24:26, 206.43it/s]

finished frames 8184000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364135/1666666 [2:09:10<23:56, 210.55it/s]

finished frames 8184600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364223/1666666 [2:09:11<23:41, 212.81it/s]

finished frames 8185200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364333/1666666 [2:09:11<23:29, 214.50it/s]

finished frames 8185800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364443/1666666 [2:09:12<23:34, 213.68it/s]

finished frames 8186400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364531/1666666 [2:09:12<23:43, 212.25it/s]

finished frames 8187000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364641/1666666 [2:09:13<23:45, 211.93it/s]

finished frames 8187600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364729/1666666 [2:09:13<23:45, 211.87it/s]

finished frames 8188200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364839/1666666 [2:09:14<23:40, 212.43it/s]

finished frames 8188800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1364927/1666666 [2:09:14<23:39, 212.63it/s]

finished frames 8189400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365037/1666666 [2:09:15<24:14, 207.44it/s]

finished frames 8190000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365124/1666666 [2:09:15<24:09, 208.08it/s]

finished frames 8190600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365234/1666666 [2:09:16<23:51, 210.56it/s]

finished frames 8191200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365322/1666666 [2:09:16<23:49, 210.84it/s]

finished frames 8191800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365432/1666666 [2:09:16<23:40, 212.03it/s]

finished frames 8192400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365542/1666666 [2:09:17<23:43, 211.49it/s]

finished frames 8193000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365630/1666666 [2:09:17<23:45, 211.18it/s]

finished frames 8193600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365740/1666666 [2:09:18<23:42, 211.50it/s]

finished frames 8194200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365828/1666666 [2:09:18<23:40, 211.73it/s]

finished frames 8194800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1365938/1666666 [2:09:19<23:40, 211.66it/s]

finished frames 8195400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366026/1666666 [2:09:19<25:50, 193.92it/s]

finished frames 8196000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366135/1666666 [2:09:20<23:59, 208.74it/s]

finished frames 8196600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366223/1666666 [2:09:20<23:50, 209.96it/s]

finished frames 8197200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366333/1666666 [2:09:21<23:31, 212.84it/s]

finished frames 8197800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366443/1666666 [2:09:21<23:15, 215.16it/s]

finished frames 8198400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366531/1666666 [2:09:22<23:16, 214.93it/s]

finished frames 8199000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366641/1666666 [2:09:22<23:17, 214.65it/s]

finished frames 8199600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366729/1666666 [2:09:23<23:21, 213.96it/s]

finished frames 8200200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366839/1666666 [2:09:23<23:23, 213.59it/s]

finished frames 8200800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1366927/1666666 [2:09:24<23:24, 213.45it/s]

finished frames 8201400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367037/1666666 [2:09:24<24:01, 207.85it/s]

finished frames 8202000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367125/1666666 [2:09:24<23:29, 212.50it/s]

finished frames 8202600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367235/1666666 [2:09:25<23:21, 213.66it/s]

finished frames 8203200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367323/1666666 [2:09:25<23:16, 214.34it/s]

finished frames 8203800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367433/1666666 [2:09:26<23:21, 213.45it/s]

finished frames 8204400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367543/1666666 [2:09:26<23:20, 213.60it/s]

finished frames 8205000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367631/1666666 [2:09:27<23:15, 214.28it/s]

finished frames 8205600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367741/1666666 [2:09:27<23:12, 214.67it/s]

finished frames 8206200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367829/1666666 [2:09:28<23:15, 214.13it/s]

finished frames 8206800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1367939/1666666 [2:09:28<23:09, 215.00it/s]

finished frames 8207400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368027/1666666 [2:09:29<23:47, 209.16it/s]

finished frames 8208000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368137/1666666 [2:09:29<23:19, 213.31it/s]

finished frames 8208600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368225/1666666 [2:09:30<23:15, 213.86it/s]

finished frames 8209200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368335/1666666 [2:09:30<24:31, 202.76it/s]

finished frames 8209800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368423/1666666 [2:09:31<24:59, 198.89it/s]

finished frames 8210400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368533/1666666 [2:09:31<23:22, 212.62it/s]

finished frames 8211000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368643/1666666 [2:09:32<23:11, 214.10it/s]

finished frames 8211600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368731/1666666 [2:09:32<23:12, 213.95it/s]

finished frames 8212200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368841/1666666 [2:09:33<23:08, 214.51it/s]

finished frames 8212800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1368929/1666666 [2:09:33<23:14, 213.58it/s]

finished frames 8213400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369039/1666666 [2:09:34<23:35, 210.25it/s]

finished frames 8214000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369127/1666666 [2:09:34<23:13, 213.45it/s]

finished frames 8214600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369237/1666666 [2:09:34<23:09, 214.02it/s]

finished frames 8215200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369325/1666666 [2:09:35<23:08, 214.21it/s]

finished frames 8215800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369435/1666666 [2:09:35<23:03, 214.83it/s]

finished frames 8216400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369523/1666666 [2:09:36<23:14, 213.03it/s]

finished frames 8217000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369633/1666666 [2:09:36<23:06, 214.29it/s]

finished frames 8217600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369743/1666666 [2:09:37<23:08, 213.86it/s]

finished frames 8218200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369831/1666666 [2:09:37<23:05, 214.24it/s]

finished frames 8218800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1369941/1666666 [2:09:38<23:06, 213.97it/s]

finished frames 8219400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370029/1666666 [2:09:38<23:35, 209.58it/s]

finished frames 8220000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370139/1666666 [2:09:39<23:05, 214.02it/s]

finished frames 8220600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370227/1666666 [2:09:39<23:05, 214.02it/s]

finished frames 8221200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370337/1666666 [2:09:40<23:05, 213.87it/s]

finished frames 8221800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370425/1666666 [2:09:40<23:05, 213.86it/s]

finished frames 8222400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370535/1666666 [2:09:41<22:50, 216.10it/s]

finished frames 8223000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370623/1666666 [2:09:41<22:56, 215.06it/s]

finished frames 8223600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370734/1666666 [2:09:41<22:54, 215.30it/s]

finished frames 8224200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370844/1666666 [2:09:42<23:52, 206.50it/s]

finished frames 8224800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1370932/1666666 [2:09:42<23:14, 212.09it/s]

finished frames 8225400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371042/1666666 [2:09:43<23:31, 209.44it/s]

finished frames 8226000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371130/1666666 [2:09:43<23:17, 211.40it/s]

finished frames 8226600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371240/1666666 [2:09:44<23:08, 212.79it/s]

finished frames 8227200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371328/1666666 [2:09:44<23:05, 213.09it/s]

finished frames 8227800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371438/1666666 [2:09:45<23:09, 212.46it/s]

finished frames 8228400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371526/1666666 [2:09:45<23:08, 212.54it/s]

finished frames 8229000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371636/1666666 [2:09:46<23:10, 212.16it/s]

finished frames 8229600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371724/1666666 [2:09:46<23:13, 211.71it/s]

finished frames 8230200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371834/1666666 [2:09:47<23:49, 206.24it/s]

finished frames 8230800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1371943/1666666 [2:09:47<23:15, 211.13it/s]

finished frames 8231400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372031/1666666 [2:09:48<23:38, 207.67it/s]

finished frames 8232000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372141/1666666 [2:09:48<23:08, 212.12it/s]

finished frames 8232600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372229/1666666 [2:09:49<23:00, 213.22it/s]

finished frames 8233200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372339/1666666 [2:09:49<22:50, 214.72it/s]

finished frames 8233800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372427/1666666 [2:09:49<23:03, 212.69it/s]

finished frames 8234400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372537/1666666 [2:09:50<23:02, 212.70it/s]

finished frames 8235000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372625/1666666 [2:09:50<23:00, 212.95it/s]

finished frames 8235600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372735/1666666 [2:09:51<22:59, 213.05it/s]

finished frames 8236200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372823/1666666 [2:09:51<23:15, 210.55it/s]

finished frames 8236800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1372929/1666666 [2:09:52<23:35, 207.52it/s]

finished frames 8237400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373035/1666666 [2:09:52<24:01, 203.74it/s]

finished frames 8238000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373140/1666666 [2:09:53<23:39, 206.83it/s]

finished frames 8238600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373224/1666666 [2:09:53<24:08, 202.55it/s]

finished frames 8239200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373332/1666666 [2:09:54<23:15, 210.22it/s]

finished frames 8239800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373442/1666666 [2:09:54<23:00, 212.33it/s]

finished frames 8240400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373530/1666666 [2:09:55<23:10, 210.86it/s]

finished frames 8241000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373638/1666666 [2:09:55<23:17, 209.68it/s]

finished frames 8241600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373726/1666666 [2:09:56<22:59, 212.38it/s]

finished frames 8242200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373836/1666666 [2:09:56<22:43, 214.82it/s]

finished frames 8242800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1373924/1666666 [2:09:57<22:45, 214.35it/s]

finished frames 8243400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374034/1666666 [2:09:57<23:25, 208.17it/s]

finished frames 8244000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374144/1666666 [2:09:58<22:49, 213.62it/s]

finished frames 8244600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374232/1666666 [2:09:58<23:02, 211.55it/s]

finished frames 8245200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374342/1666666 [2:09:59<22:35, 215.64it/s]

finished frames 8245800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374430/1666666 [2:09:59<23:03, 211.19it/s]

finished frames 8246400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374540/1666666 [2:09:59<22:37, 215.12it/s]

finished frames 8247000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374630/1666666 [2:10:00<22:14, 218.79it/s]

finished frames 8247600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374722/1666666 [2:10:00<22:06, 220.12it/s]

finished frames 8248200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374833/1666666 [2:10:01<22:32, 215.83it/s]

finished frames 8248800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 82%|████████▏ | 1374943/1666666 [2:10:01<22:30, 215.97it/s]

finished frames 8249400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375030/1666666 [2:10:02<23:32, 206.50it/s]

finished frames 8250000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375138/1666666 [2:10:02<23:12, 209.42it/s]

finished frames 8250600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375226/1666666 [2:10:03<23:03, 210.61it/s]

finished frames 8251200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375336/1666666 [2:10:03<23:00, 210.96it/s]

finished frames 8251800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375424/1666666 [2:10:04<22:48, 212.85it/s]

finished frames 8252400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375534/1666666 [2:10:04<22:36, 214.54it/s]

finished frames 8253000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375644/1666666 [2:10:05<22:36, 214.52it/s]

finished frames 8253600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375732/1666666 [2:10:05<22:38, 214.20it/s]

finished frames 8254200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375842/1666666 [2:10:06<22:34, 214.77it/s]

finished frames 8254800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1375930/1666666 [2:10:06<22:38, 214.06it/s]

finished frames 8255400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376040/1666666 [2:10:07<22:59, 210.63it/s]

finished frames 8256000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376128/1666666 [2:10:07<22:42, 213.28it/s]

finished frames 8256600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376238/1666666 [2:10:07<22:34, 214.34it/s]

finished frames 8257200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376326/1666666 [2:10:08<22:35, 214.22it/s]

finished frames 8257800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376436/1666666 [2:10:08<22:33, 214.48it/s]

finished frames 8258400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376524/1666666 [2:10:09<22:34, 214.28it/s]

finished frames 8259000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376634/1666666 [2:10:09<22:32, 214.37it/s]

finished frames 8259600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376722/1666666 [2:10:10<22:54, 210.99it/s]

finished frames 8260200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376828/1666666 [2:10:10<23:18, 207.25it/s]

finished frames 8260800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1376937/1666666 [2:10:11<22:52, 211.09it/s]

finished frames 8261400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377025/1666666 [2:10:11<23:21, 206.64it/s]

finished frames 8262000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377135/1666666 [2:10:12<22:56, 210.37it/s]

finished frames 8262600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377223/1666666 [2:10:12<22:51, 211.09it/s]

finished frames 8263200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377333/1666666 [2:10:13<22:55, 210.41it/s]

finished frames 8263800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377442/1666666 [2:10:13<22:54, 210.47it/s]

finished frames 8264400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377530/1666666 [2:10:14<22:54, 210.32it/s]

finished frames 8265000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377640/1666666 [2:10:14<22:51, 210.81it/s]

finished frames 8265600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377728/1666666 [2:10:15<22:54, 210.15it/s]

finished frames 8266200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377837/1666666 [2:10:15<22:51, 210.67it/s]

finished frames 8266800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1377923/1666666 [2:10:15<23:01, 208.95it/s]

finished frames 8267400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378029/1666666 [2:10:16<23:39, 203.33it/s]

finished frames 8268000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378135/1666666 [2:10:16<23:06, 208.16it/s]

finished frames 8268600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378240/1666666 [2:10:17<23:01, 208.85it/s]

finished frames 8269200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378324/1666666 [2:10:17<23:02, 208.61it/s]

finished frames 8269800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378431/1666666 [2:10:18<22:58, 209.04it/s]

finished frames 8270400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378537/1666666 [2:10:18<22:57, 209.11it/s]

finished frames 8271000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378643/1666666 [2:10:19<22:56, 209.25it/s]

finished frames 8271600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378728/1666666 [2:10:19<22:59, 208.75it/s]

finished frames 8272200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378833/1666666 [2:10:20<22:58, 208.77it/s]

finished frames 8272800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1378939/1666666 [2:10:20<22:57, 208.92it/s]

finished frames 8273400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379023/1666666 [2:10:21<23:34, 203.37it/s]

finished frames 8274000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379128/1666666 [2:10:21<23:05, 207.57it/s]

finished frames 8274600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379234/1666666 [2:10:22<22:57, 208.60it/s]

finished frames 8275200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379340/1666666 [2:10:22<22:54, 209.07it/s]

finished frames 8275800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379424/1666666 [2:10:23<22:57, 208.48it/s]

finished frames 8276400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379529/1666666 [2:10:23<23:01, 207.81it/s]

finished frames 8277000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379635/1666666 [2:10:24<22:50, 209.45it/s]

finished frames 8277600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379743/1666666 [2:10:24<22:49, 209.49it/s]

finished frames 8278200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379831/1666666 [2:10:25<22:51, 209.07it/s]

finished frames 8278800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1379937/1666666 [2:10:25<22:49, 209.38it/s]

finished frames 8279400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380022/1666666 [2:10:26<23:23, 204.17it/s]

finished frames 8280000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380129/1666666 [2:10:26<22:52, 208.74it/s]

finished frames 8280600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380234/1666666 [2:10:27<22:54, 208.32it/s]

finished frames 8281200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380339/1666666 [2:10:27<23:54, 199.58it/s]

finished frames 8281800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380423/1666666 [2:10:27<23:10, 205.79it/s]

finished frames 8282400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380530/1666666 [2:10:28<22:54, 208.13it/s]

finished frames 8283000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380635/1666666 [2:10:28<22:58, 207.48it/s]

finished frames 8283600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380742/1666666 [2:10:29<22:33, 211.33it/s]

finished frames 8284200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380830/1666666 [2:10:29<22:26, 212.21it/s]

finished frames 8284800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1380940/1666666 [2:10:30<22:25, 212.43it/s]

finished frames 8285400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381028/1666666 [2:10:30<22:54, 207.88it/s]

finished frames 8286000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381136/1666666 [2:10:31<22:30, 211.41it/s]

finished frames 8286600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381224/1666666 [2:10:31<22:21, 212.75it/s]

finished frames 8287200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381334/1666666 [2:10:32<22:12, 214.13it/s]

finished frames 8287800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381444/1666666 [2:10:32<22:11, 214.13it/s]

finished frames 8288400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381532/1666666 [2:10:33<22:14, 213.70it/s]

finished frames 8289000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381642/1666666 [2:10:33<22:13, 213.69it/s]

finished frames 8289600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381730/1666666 [2:10:34<22:15, 213.28it/s]

finished frames 8290200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381840/1666666 [2:10:34<22:13, 213.65it/s]

finished frames 8290800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1381928/1666666 [2:10:35<22:15, 213.23it/s]

finished frames 8291400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382037/1666666 [2:10:35<23:02, 205.86it/s]

finished frames 8292000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382123/1666666 [2:10:36<22:44, 208.57it/s]

finished frames 8292600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382231/1666666 [2:10:36<22:33, 210.08it/s]

finished frames 8293200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382338/1666666 [2:10:37<22:42, 208.68it/s]

finished frames 8293800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382422/1666666 [2:10:37<22:46, 208.03it/s]

finished frames 8294400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382528/1666666 [2:10:37<22:36, 209.48it/s]

finished frames 8295000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382634/1666666 [2:10:38<23:07, 204.78it/s]

finished frames 8295600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382740/1666666 [2:10:39<23:23, 202.27it/s]

finished frames 8296200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382825/1666666 [2:10:39<22:50, 207.04it/s]

finished frames 8296800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1382931/1666666 [2:10:39<22:34, 209.53it/s]

finished frames 8297400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383036/1666666 [2:10:40<23:14, 203.37it/s]

finished frames 8298000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383142/1666666 [2:10:40<22:43, 207.95it/s]

finished frames 8298600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383226/1666666 [2:10:41<22:42, 208.06it/s]

finished frames 8299200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383331/1666666 [2:10:41<22:39, 208.47it/s]

finished frames 8299800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383437/1666666 [2:10:42<22:34, 209.09it/s]

finished frames 8300400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383543/1666666 [2:10:42<22:29, 209.86it/s]

finished frames 8301000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383627/1666666 [2:10:43<22:35, 208.83it/s]

finished frames 8301600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383732/1666666 [2:10:43<22:36, 208.58it/s]

finished frames 8302200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383837/1666666 [2:10:44<22:35, 208.67it/s]

finished frames 8302800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1383943/1666666 [2:10:44<22:33, 208.95it/s]

finished frames 8303400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384027/1666666 [2:10:45<23:17, 202.29it/s]

finished frames 8304000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384137/1666666 [2:10:45<22:04, 213.25it/s]

finished frames 8304600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384225/1666666 [2:10:46<22:01, 213.68it/s]

finished frames 8305200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384335/1666666 [2:10:46<21:57, 214.23it/s]

finished frames 8305800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384423/1666666 [2:10:47<21:56, 214.33it/s]

finished frames 8306400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384533/1666666 [2:10:47<21:49, 215.44it/s]

finished frames 8307000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384643/1666666 [2:10:48<21:43, 216.32it/s]

finished frames 8307600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384731/1666666 [2:10:48<21:49, 215.24it/s]

finished frames 8308200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384841/1666666 [2:10:49<21:55, 214.25it/s]

finished frames 8308800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1384928/1666666 [2:10:49<23:25, 200.46it/s]

finished frames 8309400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385034/1666666 [2:10:49<25:03, 187.27it/s]

finished frames 8310000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385139/1666666 [2:10:50<22:56, 204.56it/s]

finished frames 8310600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385225/1666666 [2:10:50<22:32, 208.03it/s]

finished frames 8311200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385332/1666666 [2:10:51<22:25, 209.11it/s]

finished frames 8311800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385440/1666666 [2:10:51<22:18, 210.18it/s]

finished frames 8312400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385526/1666666 [2:10:52<22:18, 209.99it/s]

finished frames 8313000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385635/1666666 [2:10:52<22:18, 209.92it/s]

finished frames 8313600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385741/1666666 [2:10:53<22:21, 209.42it/s]

finished frames 8314200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385826/1666666 [2:10:53<22:24, 208.86it/s]

finished frames 8314800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1385932/1666666 [2:10:54<22:22, 209.12it/s]

finished frames 8315400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386038/1666666 [2:10:54<22:51, 204.59it/s]

finished frames 8316000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386122/1666666 [2:10:55<22:32, 207.39it/s]

finished frames 8316600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386227/1666666 [2:10:55<22:41, 206.01it/s]

finished frames 8317200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386332/1666666 [2:10:56<22:33, 207.09it/s]

finished frames 8317800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386437/1666666 [2:10:56<22:39, 206.08it/s]

finished frames 8318400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386542/1666666 [2:10:57<22:34, 206.86it/s]

finished frames 8319000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386626/1666666 [2:10:57<22:37, 206.28it/s]

finished frames 8319600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386731/1666666 [2:10:58<22:38, 206.07it/s]

finished frames 8320200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386836/1666666 [2:10:58<22:34, 206.58it/s]

finished frames 8320800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1386941/1666666 [2:10:59<22:34, 206.53it/s]

finished frames 8321400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387025/1666666 [2:10:59<22:59, 202.70it/s]

finished frames 8322000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387130/1666666 [2:11:00<22:37, 205.87it/s]

finished frames 8322600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387235/1666666 [2:11:00<24:23, 190.96it/s]

finished frames 8323200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387340/1666666 [2:11:01<22:45, 204.56it/s]

finished frames 8323800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387425/1666666 [2:11:01<22:25, 207.59it/s]

finished frames 8324400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387532/1666666 [2:11:02<22:19, 208.36it/s]

finished frames 8325000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387637/1666666 [2:11:02<22:25, 207.32it/s]

finished frames 8325600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387742/1666666 [2:11:03<22:29, 206.73it/s]

finished frames 8326200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387826/1666666 [2:11:03<22:35, 205.75it/s]

finished frames 8326800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1387931/1666666 [2:11:04<22:33, 205.96it/s]

finished frames 8327400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388036/1666666 [2:11:04<23:00, 201.78it/s]

finished frames 8328000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388141/1666666 [2:11:05<22:28, 206.57it/s]

finished frames 8328600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388225/1666666 [2:11:05<22:32, 205.91it/s]

finished frames 8329200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388330/1666666 [2:11:05<22:28, 206.34it/s]

finished frames 8329800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388435/1666666 [2:11:06<22:20, 207.63it/s]

finished frames 8330400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388540/1666666 [2:11:06<22:22, 207.22it/s]

finished frames 8331000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388624/1666666 [2:11:07<22:24, 206.76it/s]

finished frames 8331600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388730/1666666 [2:11:07<22:16, 207.89it/s]

finished frames 8332200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388840/1666666 [2:11:08<22:06, 209.52it/s]

finished frames 8332800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1388925/1666666 [2:11:08<22:16, 207.75it/s]

finished frames 8333400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389030/1666666 [2:11:09<22:47, 202.96it/s]

finished frames 8334000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389137/1666666 [2:11:09<22:11, 208.50it/s]

finished frames 8334600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389222/1666666 [2:11:10<22:09, 208.70it/s]

finished frames 8335200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389328/1666666 [2:11:10<22:10, 208.44it/s]

finished frames 8335800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389434/1666666 [2:11:11<22:05, 209.18it/s]

finished frames 8336400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389540/1666666 [2:11:11<22:06, 208.94it/s]

finished frames 8337000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389625/1666666 [2:11:12<22:48, 202.46it/s]

finished frames 8337600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389731/1666666 [2:11:12<23:12, 198.94it/s]

finished frames 8338200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389837/1666666 [2:11:13<22:17, 206.98it/s]

finished frames 8338800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1389943/1666666 [2:11:13<22:04, 208.97it/s]

finished frames 8339400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390027/1666666 [2:11:14<22:36, 203.94it/s]

finished frames 8340000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390133/1666666 [2:11:14<22:14, 207.22it/s]

finished frames 8340600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390238/1666666 [2:11:15<22:10, 207.81it/s]

finished frames 8341200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390324/1666666 [2:11:15<21:53, 210.43it/s]

finished frames 8341800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390434/1666666 [2:11:16<21:46, 211.48it/s]

finished frames 8342400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390522/1666666 [2:11:16<21:42, 212.03it/s]

finished frames 8343000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390632/1666666 [2:11:17<21:37, 212.75it/s]

finished frames 8343600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390742/1666666 [2:11:17<21:38, 212.45it/s]

finished frames 8344200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390830/1666666 [2:11:17<21:40, 212.04it/s]

finished frames 8344800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1390939/1666666 [2:11:18<21:52, 210.03it/s]

finished frames 8345400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1391024/1666666 [2:11:18<22:32, 203.78it/s]

finished frames 8346000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1391131/1666666 [2:11:19<21:59, 208.75it/s]

finished frames 8346600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1391238/1666666 [2:11:19<21:52, 209.83it/s]

finished frames 8347200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1391322/1666666 [2:11:20<21:59, 208.69it/s]

finished frames 8347800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1391432/1666666 [2:11:20<21:47, 210.51it/s]

finished frames 8348400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1391542/1666666 [2:11:21<21:48, 210.23it/s]

finished frames 8349000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 83%|████████▎ | 1391628/1666666 [2:11:21<21:51, 209.77it/s]

finished frames 8349600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1391734/1666666 [2:11:22<21:53, 209.38it/s]

finished frames 8350200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1391842/1666666 [2:11:22<21:49, 209.89it/s]

finished frames 8350800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1391926/1666666 [2:11:23<23:58, 190.99it/s]

finished frames 8351400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392030/1666666 [2:11:23<24:47, 184.64it/s]

finished frames 8352000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392137/1666666 [2:11:24<22:22, 204.50it/s]

finished frames 8352600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392243/1666666 [2:11:24<21:56, 208.44it/s]

finished frames 8353200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392328/1666666 [2:11:25<21:51, 209.13it/s]

finished frames 8353800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392434/1666666 [2:11:25<21:50, 209.31it/s]

finished frames 8354400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392539/1666666 [2:11:26<21:54, 208.58it/s]

finished frames 8355000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392624/1666666 [2:11:26<21:58, 207.91it/s]

finished frames 8355600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392731/1666666 [2:11:27<21:56, 208.04it/s]

finished frames 8356200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392837/1666666 [2:11:27<21:47, 209.40it/s]

finished frames 8356800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1392925/1666666 [2:11:28<21:43, 210.02it/s]

finished frames 8357400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393035/1666666 [2:11:28<22:08, 205.96it/s]

finished frames 8358000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393143/1666666 [2:11:29<21:47, 209.23it/s]

finished frames 8358600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393229/1666666 [2:11:29<21:34, 211.27it/s]

finished frames 8359200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393339/1666666 [2:11:30<21:23, 212.89it/s]

finished frames 8359800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393427/1666666 [2:11:30<21:23, 212.85it/s]

finished frames 8360400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393537/1666666 [2:11:30<21:18, 213.62it/s]

finished frames 8361000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393625/1666666 [2:11:31<21:23, 212.81it/s]

finished frames 8361600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393735/1666666 [2:11:31<21:23, 212.66it/s]

finished frames 8362200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393823/1666666 [2:11:32<21:18, 213.42it/s]

finished frames 8362800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1393933/1666666 [2:11:32<21:12, 214.30it/s]

finished frames 8363400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394021/1666666 [2:11:33<22:02, 206.15it/s]

finished frames 8364000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394131/1666666 [2:11:33<21:50, 208.02it/s]

finished frames 8364600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394240/1666666 [2:11:34<21:27, 211.66it/s]

finished frames 8365200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394328/1666666 [2:11:34<21:23, 212.11it/s]

finished frames 8365800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394438/1666666 [2:11:35<22:48, 198.95it/s]

finished frames 8366400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394522/1666666 [2:11:35<22:06, 205.16it/s]

finished frames 8367000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394630/1666666 [2:11:36<21:39, 209.37it/s]

finished frames 8367600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394735/1666666 [2:11:36<21:41, 208.88it/s]

finished frames 8368200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394840/1666666 [2:11:37<21:46, 208.11it/s]

finished frames 8368800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1394924/1666666 [2:11:37<21:45, 208.10it/s]

finished frames 8369400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1395029/1666666 [2:11:38<22:13, 203.67it/s]

finished frames 8370000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1395134/1666666 [2:11:38<21:54, 206.59it/s]

finished frames 8370600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1395239/1666666 [2:11:39<21:47, 207.53it/s]

finished frames 8371200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1395324/1666666 [2:11:39<21:37, 209.18it/s]

finished frames 8371800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1395434/1666666 [2:11:40<21:08, 213.80it/s]

finished frames 8372400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1395544/1666666 [2:11:40<20:59, 215.24it/s]

finished frames 8373000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1395632/1666666 [2:11:40<20:54, 216.07it/s]

finished frames 8373600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1395742/1666666 [2:11:41<20:49, 216.84it/s]

finished frames 8374200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▎ | 1395830/1666666 [2:11:41<20:54, 215.83it/s]

finished frames 8374800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1395940/1666666 [2:11:42<20:49, 216.67it/s]

finished frames 8375400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396028/1666666 [2:11:42<21:30, 209.77it/s]

finished frames 8376000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396138/1666666 [2:11:43<20:53, 215.83it/s]

finished frames 8376600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396226/1666666 [2:11:43<20:57, 214.98it/s]

finished frames 8377200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396336/1666666 [2:11:44<21:00, 214.46it/s]

finished frames 8377800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396424/1666666 [2:11:44<21:03, 213.84it/s]

finished frames 8378400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396534/1666666 [2:11:45<20:52, 215.67it/s]

finished frames 8379000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396644/1666666 [2:11:45<20:56, 214.96it/s]

finished frames 8379600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396732/1666666 [2:11:46<21:37, 207.99it/s]

finished frames 8380200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396842/1666666 [2:11:46<23:03, 195.09it/s]

finished frames 8380800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1396929/1666666 [2:11:47<21:41, 207.23it/s]

finished frames 8381400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397039/1666666 [2:11:47<22:07, 203.13it/s]

finished frames 8382000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397127/1666666 [2:11:48<21:17, 210.93it/s]

finished frames 8382600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397237/1666666 [2:11:48<21:08, 212.44it/s]

finished frames 8383200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397325/1666666 [2:11:48<21:06, 212.73it/s]

finished frames 8383800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397435/1666666 [2:11:49<21:02, 213.28it/s]

finished frames 8384400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397523/1666666 [2:11:49<21:04, 212.87it/s]

finished frames 8385000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397632/1666666 [2:11:50<21:25, 209.27it/s]

finished frames 8385600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397737/1666666 [2:11:50<21:35, 207.53it/s]

finished frames 8386200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397842/1666666 [2:11:51<21:40, 206.72it/s]

finished frames 8386800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1397926/1666666 [2:11:51<21:41, 206.50it/s]

finished frames 8387400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398031/1666666 [2:11:52<22:13, 201.51it/s]

finished frames 8388000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398136/1666666 [2:11:52<21:39, 206.58it/s]

finished frames 8388600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398241/1666666 [2:11:53<21:35, 207.25it/s]

finished frames 8389200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398325/1666666 [2:11:53<22:17, 200.68it/s]

finished frames 8389800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398433/1666666 [2:11:54<21:22, 209.15it/s]

finished frames 8390400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398542/1666666 [2:11:54<21:19, 209.49it/s]

finished frames 8391000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398628/1666666 [2:11:55<21:38, 206.43it/s]

finished frames 8391600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398733/1666666 [2:11:55<21:45, 205.19it/s]

finished frames 8392200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398842/1666666 [2:11:56<21:15, 209.94it/s]

finished frames 8392800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1398930/1666666 [2:11:56<21:02, 212.00it/s]

finished frames 8393400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399040/1666666 [2:11:57<21:29, 207.60it/s]

finished frames 8394000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399127/1666666 [2:11:57<21:09, 210.70it/s]

finished frames 8394600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399237/1666666 [2:11:58<21:01, 211.93it/s]

finished frames 8395200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399325/1666666 [2:11:58<21:16, 209.49it/s]

finished frames 8395800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399434/1666666 [2:11:59<21:15, 209.47it/s]

finished frames 8396400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399539/1666666 [2:11:59<21:35, 206.17it/s]

finished frames 8397000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399625/1666666 [2:12:00<21:20, 208.60it/s]

finished frames 8397600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399735/1666666 [2:12:00<21:00, 211.82it/s]

finished frames 8398200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399823/1666666 [2:12:00<20:53, 212.88it/s]

finished frames 8398800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1399932/1666666 [2:12:01<21:14, 209.29it/s]

finished frames 8399400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400039/1666666 [2:12:02<21:30, 206.58it/s]

finished frames 8400000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400127/1666666 [2:12:02<21:07, 210.37it/s]

finished frames 8400600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400236/1666666 [2:12:02<21:06, 210.33it/s]

finished frames 8401200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400323/1666666 [2:12:03<21:20, 207.98it/s]

finished frames 8401800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400433/1666666 [2:12:03<21:06, 210.25it/s]

finished frames 8402400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400543/1666666 [2:12:04<21:03, 210.70it/s]

finished frames 8403000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400631/1666666 [2:12:04<21:02, 210.74it/s]

finished frames 8403600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400740/1666666 [2:12:05<20:58, 211.27it/s]

finished frames 8404200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400828/1666666 [2:12:05<21:01, 210.72it/s]

finished frames 8404800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1400938/1666666 [2:12:06<20:57, 211.29it/s]

finished frames 8405400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401026/1666666 [2:12:06<21:23, 207.00it/s]

finished frames 8406000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401136/1666666 [2:12:07<20:48, 212.76it/s]

finished frames 8406600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401224/1666666 [2:12:07<20:45, 213.17it/s]

finished frames 8407200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401334/1666666 [2:12:08<20:41, 213.69it/s]

finished frames 8407800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401444/1666666 [2:12:08<21:15, 207.97it/s]

finished frames 8408400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401532/1666666 [2:12:09<22:01, 200.60it/s]

finished frames 8409000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401642/1666666 [2:12:09<20:52, 211.66it/s]

finished frames 8409600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401730/1666666 [2:12:10<20:44, 212.87it/s]

finished frames 8410200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401840/1666666 [2:12:10<20:34, 214.47it/s]

finished frames 8410800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1401928/1666666 [2:12:10<20:36, 214.13it/s]

finished frames 8411400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402038/1666666 [2:12:11<21:05, 209.19it/s]

finished frames 8412000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402126/1666666 [2:12:11<20:45, 212.43it/s]

finished frames 8412600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402236/1666666 [2:12:12<20:35, 214.00it/s]

finished frames 8413200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402324/1666666 [2:12:12<20:38, 213.46it/s]

finished frames 8413800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402434/1666666 [2:12:13<20:37, 213.44it/s]

finished frames 8414400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402544/1666666 [2:12:13<20:37, 213.36it/s]

finished frames 8415000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402632/1666666 [2:12:14<20:40, 212.93it/s]

finished frames 8415600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402742/1666666 [2:12:14<20:38, 213.02it/s]

finished frames 8416200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402830/1666666 [2:12:15<20:40, 212.68it/s]

finished frames 8416800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1402940/1666666 [2:12:15<20:35, 213.51it/s]

finished frames 8417400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403028/1666666 [2:12:16<21:05, 208.33it/s]

finished frames 8418000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403138/1666666 [2:12:16<20:41, 212.26it/s]

finished frames 8418600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403226/1666666 [2:12:17<20:38, 212.67it/s]

finished frames 8419200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403336/1666666 [2:12:17<20:32, 213.57it/s]

finished frames 8419800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403424/1666666 [2:12:18<20:35, 213.09it/s]

finished frames 8420400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403534/1666666 [2:12:18<20:32, 213.53it/s]

finished frames 8421000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403644/1666666 [2:12:19<20:32, 213.44it/s]

finished frames 8421600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403732/1666666 [2:12:19<20:35, 212.82it/s]

finished frames 8422200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403842/1666666 [2:12:20<21:05, 207.74it/s]

finished frames 8422800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1403930/1666666 [2:12:20<20:44, 211.09it/s]

finished frames 8423400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404040/1666666 [2:12:20<20:59, 208.56it/s]

finished frames 8424000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404128/1666666 [2:12:21<20:44, 211.04it/s]

finished frames 8424600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404238/1666666 [2:12:21<20:35, 212.37it/s]

finished frames 8425200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404326/1666666 [2:12:22<20:36, 212.22it/s]

finished frames 8425800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404436/1666666 [2:12:22<20:48, 209.98it/s]

finished frames 8426400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404522/1666666 [2:12:23<20:52, 209.25it/s]

finished frames 8427000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404629/1666666 [2:12:23<21:03, 207.39it/s]

finished frames 8427600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404734/1666666 [2:12:24<21:06, 206.74it/s]

finished frames 8428200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404839/1666666 [2:12:24<21:11, 205.96it/s]

finished frames 8428800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1404923/1666666 [2:12:25<21:11, 205.85it/s]

finished frames 8429400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405028/1666666 [2:12:25<21:31, 202.53it/s]

finished frames 8430000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405134/1666666 [2:12:26<20:55, 208.37it/s]

finished frames 8430600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405244/1666666 [2:12:26<20:29, 212.57it/s]

finished frames 8431200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405332/1666666 [2:12:27<20:22, 213.77it/s]

finished frames 8431800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405442/1666666 [2:12:27<20:11, 215.61it/s]

finished frames 8432400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405530/1666666 [2:12:28<20:12, 215.42it/s]

finished frames 8433000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405640/1666666 [2:12:28<20:10, 215.64it/s]

finished frames 8433600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405728/1666666 [2:12:28<20:16, 214.57it/s]

finished frames 8434200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405838/1666666 [2:12:29<20:12, 215.17it/s]

finished frames 8434800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1405926/1666666 [2:12:29<20:16, 214.39it/s]

finished frames 8435400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406036/1666666 [2:12:30<20:41, 209.90it/s]

finished frames 8436000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406124/1666666 [2:12:30<20:17, 213.97it/s]

finished frames 8436600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406234/1666666 [2:12:31<20:36, 210.63it/s]

finished frames 8437200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406344/1666666 [2:12:31<20:12, 214.74it/s]

finished frames 8437800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406432/1666666 [2:12:32<20:15, 214.10it/s]

finished frames 8438400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406542/1666666 [2:12:32<20:08, 215.17it/s]

finished frames 8439000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406630/1666666 [2:12:33<20:07, 215.42it/s]

finished frames 8439600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406740/1666666 [2:12:33<20:02, 216.09it/s]

finished frames 8440200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406828/1666666 [2:12:34<20:10, 214.70it/s]

finished frames 8440800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1406938/1666666 [2:12:34<20:06, 215.26it/s]

finished frames 8441400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407026/1666666 [2:12:35<20:36, 209.90it/s]

finished frames 8442000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407136/1666666 [2:12:35<20:10, 214.48it/s]

finished frames 8442600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407224/1666666 [2:12:35<20:07, 214.93it/s]

finished frames 8443200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407334/1666666 [2:12:36<19:58, 216.46it/s]

finished frames 8443800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407444/1666666 [2:12:37<20:03, 215.35it/s]

finished frames 8444400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407532/1666666 [2:12:37<20:06, 214.74it/s]

finished frames 8445000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407642/1666666 [2:12:37<20:04, 215.13it/s]

finished frames 8445600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407730/1666666 [2:12:38<20:04, 214.94it/s]

finished frames 8446200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407840/1666666 [2:12:38<20:05, 214.66it/s]

finished frames 8446800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1407928/1666666 [2:12:39<20:05, 214.57it/s]

finished frames 8447400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1408038/1666666 [2:12:39<20:32, 209.82it/s]

finished frames 8448000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1408126/1666666 [2:12:40<20:09, 213.69it/s]

finished frames 8448600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 84%|████████▍ | 1408236/1666666 [2:12:40<20:21, 211.49it/s]

finished frames 8449200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1408343/1666666 [2:12:41<20:35, 209.11it/s]

finished frames 8449800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1408427/1666666 [2:12:41<20:45, 207.32it/s]

finished frames 8450400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1408533/1666666 [2:12:42<20:39, 208.19it/s]

finished frames 8451000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1408640/1666666 [2:12:42<21:00, 204.77it/s]

finished frames 8451600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1408724/1666666 [2:12:43<21:27, 200.41it/s]

finished frames 8452200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1408832/1666666 [2:12:43<20:33, 208.96it/s]

finished frames 8452800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1408938/1666666 [2:12:44<20:35, 208.57it/s]

finished frames 8453400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409022/1666666 [2:12:44<21:13, 202.36it/s]

finished frames 8454000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409127/1666666 [2:12:45<20:51, 205.85it/s]

finished frames 8454600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409233/1666666 [2:12:45<20:35, 208.44it/s]

finished frames 8455200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409343/1666666 [2:12:46<20:13, 212.06it/s]

finished frames 8455800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409431/1666666 [2:12:46<20:06, 213.27it/s]

finished frames 8456400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409541/1666666 [2:12:47<19:56, 214.85it/s]

finished frames 8457000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409629/1666666 [2:12:47<19:56, 214.78it/s]

finished frames 8457600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409739/1666666 [2:12:47<19:53, 215.31it/s]

finished frames 8458200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409827/1666666 [2:12:48<19:57, 214.40it/s]

finished frames 8458800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1409937/1666666 [2:12:48<19:59, 214.12it/s]

finished frames 8459400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410025/1666666 [2:12:49<20:21, 210.07it/s]

finished frames 8460000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410135/1666666 [2:12:49<20:07, 212.50it/s]

finished frames 8460600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410223/1666666 [2:12:50<20:10, 211.83it/s]

finished frames 8461200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410333/1666666 [2:12:50<20:00, 213.52it/s]

finished frames 8461800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410443/1666666 [2:12:51<19:57, 213.91it/s]

finished frames 8462400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410531/1666666 [2:12:51<20:03, 212.85it/s]

finished frames 8463000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410641/1666666 [2:12:52<20:03, 212.75it/s]

finished frames 8463600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410729/1666666 [2:12:52<20:00, 213.19it/s]

finished frames 8464200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410839/1666666 [2:12:53<19:59, 213.22it/s]

finished frames 8464800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1410927/1666666 [2:12:53<19:57, 213.55it/s]

finished frames 8465400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411015/1666666 [2:12:53<20:34, 207.14it/s]

finished frames 8466000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411144/1666666 [2:12:54<20:25, 208.59it/s]

finished frames 8466600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411232/1666666 [2:12:55<20:05, 211.95it/s]

finished frames 8467200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411342/1666666 [2:12:55<20:07, 211.49it/s]

finished frames 8467800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411430/1666666 [2:12:55<20:08, 211.15it/s]

finished frames 8468400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411540/1666666 [2:12:56<20:04, 211.80it/s]

finished frames 8469000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411628/1666666 [2:12:56<20:06, 211.37it/s]

finished frames 8469600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411738/1666666 [2:12:57<20:02, 211.94it/s]

finished frames 8470200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411826/1666666 [2:12:57<19:58, 212.66it/s]

finished frames 8470800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1411936/1666666 [2:12:58<20:07, 210.94it/s]

finished frames 8471400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412023/1666666 [2:12:58<20:36, 205.98it/s]

finished frames 8472000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412128/1666666 [2:12:59<20:25, 207.67it/s]

finished frames 8472600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412234/1666666 [2:12:59<20:19, 208.66it/s]

finished frames 8473200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412340/1666666 [2:13:00<20:15, 209.29it/s]

finished frames 8473800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412427/1666666 [2:13:00<20:14, 209.29it/s]

finished frames 8474400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412532/1666666 [2:13:01<20:20, 208.29it/s]

finished frames 8475000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412639/1666666 [2:13:01<20:14, 209.15it/s]

finished frames 8475600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412723/1666666 [2:13:02<20:22, 207.77it/s]

finished frames 8476200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412828/1666666 [2:13:02<20:14, 209.06it/s]

finished frames 8476800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1412934/1666666 [2:13:03<20:19, 208.00it/s]

finished frames 8477400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413041/1666666 [2:13:03<20:31, 205.91it/s]

finished frames 8478000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413129/1666666 [2:13:04<20:05, 210.34it/s]

finished frames 8478600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413239/1666666 [2:13:04<19:57, 211.63it/s]

finished frames 8479200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413326/1666666 [2:13:05<20:50, 202.52it/s]

finished frames 8479800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413434/1666666 [2:13:05<21:30, 196.18it/s]

finished frames 8480400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413541/1666666 [2:13:06<20:21, 207.14it/s]

finished frames 8481000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413626/1666666 [2:13:06<20:14, 208.29it/s]

finished frames 8481600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413731/1666666 [2:13:06<20:21, 207.02it/s]

finished frames 8482200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413836/1666666 [2:13:07<20:18, 207.56it/s]

finished frames 8482800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1413941/1666666 [2:13:07<20:19, 207.32it/s]

finished frames 8483400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414025/1666666 [2:13:08<20:51, 201.90it/s]

finished frames 8484000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414130/1666666 [2:13:08<20:32, 204.91it/s]

finished frames 8484600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414236/1666666 [2:13:09<20:21, 206.74it/s]

finished frames 8485200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414342/1666666 [2:13:09<20:15, 207.66it/s]

finished frames 8485800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414426/1666666 [2:13:10<20:19, 206.91it/s]

finished frames 8486400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414532/1666666 [2:13:10<20:10, 208.25it/s]

finished frames 8487000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414639/1666666 [2:13:11<20:09, 208.29it/s]

finished frames 8487600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414723/1666666 [2:13:11<20:13, 207.63it/s]

finished frames 8488200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414831/1666666 [2:13:12<19:59, 210.00it/s]

finished frames 8488800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1414939/1666666 [2:13:12<19:52, 211.01it/s]

finished frames 8489400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415027/1666666 [2:13:13<20:11, 207.67it/s]

finished frames 8490000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415137/1666666 [2:13:13<19:46, 211.91it/s]

finished frames 8490600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415225/1666666 [2:13:14<19:42, 212.58it/s]

finished frames 8491200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415335/1666666 [2:13:14<19:41, 212.71it/s]

finished frames 8491800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415423/1666666 [2:13:15<19:41, 212.71it/s]

finished frames 8492400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415533/1666666 [2:13:15<19:33, 213.91it/s]

finished frames 8493000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415621/1666666 [2:13:16<22:59, 181.99it/s]

finished frames 8493600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415729/1666666 [2:13:16<21:59, 190.13it/s]

finished frames 8494200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415834/1666666 [2:13:17<20:26, 204.46it/s]

finished frames 8494800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1415940/1666666 [2:13:17<20:05, 208.04it/s]

finished frames 8495400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1416024/1666666 [2:13:18<20:34, 203.07it/s]

finished frames 8496000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1416129/1666666 [2:13:18<20:07, 207.51it/s]

finished frames 8496600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1416234/1666666 [2:13:19<20:06, 207.59it/s]

finished frames 8497200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1416339/1666666 [2:13:19<20:05, 207.57it/s]

finished frames 8497800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1416423/1666666 [2:13:19<20:08, 207.05it/s]

finished frames 8498400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1416528/1666666 [2:13:20<20:11, 206.47it/s]

finished frames 8499000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▍ | 1416633/1666666 [2:13:20<20:04, 207.60it/s]

finished frames 8499600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1416739/1666666 [2:13:21<19:57, 208.66it/s]

finished frames 8500200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1416824/1666666 [2:13:21<19:56, 208.89it/s]

finished frames 8500800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1416929/1666666 [2:13:22<19:58, 208.29it/s]

finished frames 8501400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417034/1666666 [2:13:22<20:21, 204.39it/s]

finished frames 8502000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417141/1666666 [2:13:23<19:46, 210.22it/s]

finished frames 8502600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417226/1666666 [2:13:23<19:57, 208.27it/s]

finished frames 8503200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417332/1666666 [2:13:24<19:55, 208.56it/s]

finished frames 8503800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417437/1666666 [2:13:24<19:56, 208.32it/s]

finished frames 8504400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417522/1666666 [2:13:25<19:55, 208.48it/s]

finished frames 8505000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417630/1666666 [2:13:25<19:47, 209.76it/s]

finished frames 8505600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417737/1666666 [2:13:26<19:50, 209.17it/s]

finished frames 8506200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417842/1666666 [2:13:26<19:51, 208.83it/s]

finished frames 8506800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1417927/1666666 [2:13:27<19:54, 208.32it/s]

finished frames 8507400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418032/1666666 [2:13:27<20:45, 199.58it/s]

finished frames 8508000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418141/1666666 [2:13:28<19:28, 212.73it/s]

finished frames 8508600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418229/1666666 [2:13:28<19:22, 213.64it/s]

finished frames 8509200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418339/1666666 [2:13:29<19:24, 213.28it/s]

finished frames 8509800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418427/1666666 [2:13:29<19:29, 212.21it/s]

finished frames 8510400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418537/1666666 [2:13:30<19:26, 212.71it/s]

finished frames 8511000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418625/1666666 [2:13:30<19:26, 212.55it/s]

finished frames 8511600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418735/1666666 [2:13:31<19:24, 212.91it/s]

finished frames 8512200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418823/1666666 [2:13:31<19:28, 212.11it/s]

finished frames 8512800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1418933/1666666 [2:13:31<19:23, 212.90it/s]

finished frames 8513400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419021/1666666 [2:13:32<20:02, 205.87it/s]

finished frames 8514000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419130/1666666 [2:13:32<19:42, 209.32it/s]

finished frames 8514600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419235/1666666 [2:13:33<19:43, 209.01it/s]

finished frames 8515200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419341/1666666 [2:13:33<19:42, 209.17it/s]

finished frames 8515800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419426/1666666 [2:13:34<19:45, 208.53it/s]

finished frames 8516400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419532/1666666 [2:13:34<19:46, 208.32it/s]

finished frames 8517000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419638/1666666 [2:13:35<19:43, 208.68it/s]

finished frames 8517600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419722/1666666 [2:13:35<19:49, 207.63it/s]

finished frames 8518200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419831/1666666 [2:13:36<19:32, 210.44it/s]

finished frames 8518800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1419937/1666666 [2:13:36<19:49, 207.35it/s]

finished frames 8519400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420021/1666666 [2:13:37<20:29, 200.62it/s]

finished frames 8520000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420126/1666666 [2:13:37<20:03, 204.87it/s]

finished frames 8520600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420231/1666666 [2:13:38<19:58, 205.67it/s]

finished frames 8521200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420336/1666666 [2:13:38<21:36, 189.96it/s]

finished frames 8521800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420420/1666666 [2:13:39<22:06, 185.57it/s]

finished frames 8522400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420525/1666666 [2:13:39<20:16, 202.30it/s]

finished frames 8523000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420630/1666666 [2:13:40<19:56, 205.56it/s]

finished frames 8523600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420735/1666666 [2:13:40<19:52, 206.25it/s]

finished frames 8524200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420840/1666666 [2:13:41<19:53, 205.94it/s]

finished frames 8524800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1420924/1666666 [2:13:41<19:54, 205.79it/s]

finished frames 8525400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421029/1666666 [2:13:42<20:12, 202.53it/s]

finished frames 8526000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421134/1666666 [2:13:42<19:46, 206.88it/s]

finished frames 8526600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421242/1666666 [2:13:43<19:25, 210.64it/s]

finished frames 8527200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421330/1666666 [2:13:43<19:22, 211.12it/s]

finished frames 8527800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421440/1666666 [2:13:44<19:16, 212.08it/s]

finished frames 8528400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421528/1666666 [2:13:44<19:16, 211.89it/s]

finished frames 8529000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421638/1666666 [2:13:45<19:09, 213.22it/s]

finished frames 8529600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421726/1666666 [2:13:45<19:11, 212.65it/s]

finished frames 8530200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421836/1666666 [2:13:46<19:14, 212.11it/s]

finished frames 8530800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1421924/1666666 [2:13:46<19:12, 212.43it/s]

finished frames 8531400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422034/1666666 [2:13:46<19:37, 207.73it/s]

finished frames 8532000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422143/1666666 [2:13:47<19:15, 211.54it/s]

finished frames 8532600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422231/1666666 [2:13:47<19:09, 212.64it/s]

finished frames 8533200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422341/1666666 [2:13:48<18:57, 214.87it/s]

finished frames 8533800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422429/1666666 [2:13:48<19:05, 213.23it/s]

finished frames 8534400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422539/1666666 [2:13:49<18:54, 215.19it/s]

finished frames 8535000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422627/1666666 [2:13:49<18:54, 215.18it/s]

finished frames 8535600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422737/1666666 [2:13:50<18:57, 214.42it/s]

finished frames 8536200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422825/1666666 [2:13:50<18:57, 214.37it/s]

finished frames 8536800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1422935/1666666 [2:13:51<18:56, 214.41it/s]

finished frames 8537400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423022/1666666 [2:13:51<19:26, 208.88it/s]

finished frames 8538000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423132/1666666 [2:13:52<19:12, 211.26it/s]

finished frames 8538600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423242/1666666 [2:13:52<19:02, 213.13it/s]

finished frames 8539200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423330/1666666 [2:13:53<19:08, 211.86it/s]

finished frames 8539800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423440/1666666 [2:13:53<19:11, 211.30it/s]

finished frames 8540400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423525/1666666 [2:13:53<19:36, 206.63it/s]

finished frames 8541000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423634/1666666 [2:13:54<19:28, 207.92it/s]

finished frames 8541600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423722/1666666 [2:13:54<19:13, 210.69it/s]

finished frames 8542200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423828/1666666 [2:13:55<19:36, 206.41it/s]

finished frames 8542800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1423938/1666666 [2:13:55<18:51, 214.49it/s]

finished frames 8543400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424027/1666666 [2:13:56<19:00, 212.70it/s]

finished frames 8544000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424141/1666666 [2:13:56<18:27, 219.08it/s]

finished frames 8544600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424232/1666666 [2:13:57<18:22, 219.93it/s]

finished frames 8545200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424323/1666666 [2:13:57<18:20, 220.27it/s]

finished frames 8545800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424434/1666666 [2:13:58<18:30, 218.04it/s]

finished frames 8546400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424522/1666666 [2:13:58<18:55, 213.31it/s]

finished frames 8547000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424633/1666666 [2:13:59<18:44, 215.27it/s]

finished frames 8547600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424743/1666666 [2:13:59<19:06, 211.08it/s]

finished frames 8548200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424832/1666666 [2:14:00<18:36, 216.64it/s]

finished frames 8548800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 85%|████████▌ | 1424923/1666666 [2:14:00<18:17, 220.27it/s]

finished frames 8549400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425038/1666666 [2:14:01<18:33, 216.99it/s]

finished frames 8550000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425126/1666666 [2:14:01<18:57, 212.30it/s]

finished frames 8550600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425235/1666666 [2:14:01<19:12, 209.46it/s]

finished frames 8551200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425323/1666666 [2:14:02<19:03, 211.08it/s]

finished frames 8551800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425432/1666666 [2:14:02<19:04, 210.79it/s]

finished frames 8552400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425542/1666666 [2:14:03<19:02, 211.12it/s]

finished frames 8553000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425630/1666666 [2:14:03<19:08, 209.81it/s]

finished frames 8553600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425739/1666666 [2:14:04<18:58, 211.61it/s]

finished frames 8554200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425827/1666666 [2:14:04<18:53, 212.49it/s]

finished frames 8554800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1425937/1666666 [2:14:05<18:50, 212.92it/s]

finished frames 8555400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426024/1666666 [2:14:05<19:17, 207.88it/s]

finished frames 8556000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426134/1666666 [2:14:06<19:00, 210.99it/s]

finished frames 8556600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426244/1666666 [2:14:06<18:50, 212.60it/s]

finished frames 8557200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426332/1666666 [2:14:07<18:51, 212.44it/s]

finished frames 8557800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426442/1666666 [2:14:07<18:57, 211.23it/s]

finished frames 8558400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426527/1666666 [2:14:08<19:21, 206.77it/s]

finished frames 8559000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426632/1666666 [2:14:08<19:23, 206.38it/s]

finished frames 8559600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426737/1666666 [2:14:09<19:23, 206.13it/s]

finished frames 8560200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426842/1666666 [2:14:09<19:24, 206.01it/s]

finished frames 8560800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1426926/1666666 [2:14:10<19:26, 205.57it/s]

finished frames 8561400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427032/1666666 [2:14:10<19:39, 203.17it/s]

finished frames 8562000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427141/1666666 [2:14:11<18:52, 211.56it/s]

finished frames 8562600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427229/1666666 [2:14:11<18:49, 212.07it/s]

finished frames 8563200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427339/1666666 [2:14:12<18:54, 211.02it/s]

finished frames 8563800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427426/1666666 [2:14:12<22:20, 178.48it/s]

finished frames 8564400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427532/1666666 [2:14:12<19:44, 201.94it/s]

finished frames 8565000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427637/1666666 [2:14:13<19:33, 203.68it/s]

finished frames 8565600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427721/1666666 [2:14:13<19:49, 200.82it/s]

finished frames 8566200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427826/1666666 [2:14:14<19:39, 202.42it/s]

finished frames 8566800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1427931/1666666 [2:14:14<19:45, 201.32it/s]

finished frames 8567400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428036/1666666 [2:14:15<20:18, 195.89it/s]

finished frames 8568000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428138/1666666 [2:14:16<19:57, 199.11it/s]

finished frames 8568600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428239/1666666 [2:14:16<19:52, 200.00it/s]

finished frames 8569200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428323/1666666 [2:14:16<19:50, 200.15it/s]

finished frames 8569800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428428/1666666 [2:14:17<19:37, 202.35it/s]

finished frames 8570400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428533/1666666 [2:14:17<19:42, 201.35it/s]

finished frames 8571000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428638/1666666 [2:14:18<19:44, 200.97it/s]

finished frames 8571600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428722/1666666 [2:14:18<19:50, 199.87it/s]

finished frames 8572200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428827/1666666 [2:14:19<19:48, 200.12it/s]

finished frames 8572800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1428930/1666666 [2:14:19<19:47, 200.27it/s]

finished frames 8573400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429033/1666666 [2:14:20<20:14, 195.70it/s]

finished frames 8574000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429134/1666666 [2:14:21<19:55, 198.71it/s]

finished frames 8574600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429234/1666666 [2:14:21<19:53, 198.94it/s]

finished frames 8575200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429336/1666666 [2:14:22<19:48, 199.76it/s]

finished frames 8575800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429438/1666666 [2:14:22<19:42, 200.54it/s]

finished frames 8576400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429541/1666666 [2:14:23<19:47, 199.73it/s]

finished frames 8577000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429625/1666666 [2:14:23<19:45, 200.00it/s]

finished frames 8577600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429727/1666666 [2:14:24<20:23, 193.70it/s]

finished frames 8578200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429829/1666666 [2:14:24<20:41, 190.71it/s]

finished frames 8578800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1429931/1666666 [2:14:25<19:50, 198.88it/s]

finished frames 8579400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430032/1666666 [2:14:25<20:14, 194.76it/s]

finished frames 8580000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430133/1666666 [2:14:26<19:44, 199.65it/s]

finished frames 8580600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430234/1666666 [2:14:26<19:36, 200.94it/s]

finished frames 8581200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430342/1666666 [2:14:27<18:51, 208.79it/s]

finished frames 8581800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430430/1666666 [2:14:27<18:40, 210.92it/s]

finished frames 8582400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430540/1666666 [2:14:28<18:31, 212.46it/s]

finished frames 8583000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430628/1666666 [2:14:28<18:35, 211.56it/s]

finished frames 8583600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430737/1666666 [2:14:28<18:51, 208.43it/s]

finished frames 8584200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430842/1666666 [2:14:29<19:00, 206.84it/s]

finished frames 8584800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1430927/1666666 [2:14:29<18:54, 207.71it/s]

finished frames 8585400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431036/1666666 [2:14:30<18:56, 207.42it/s]

finished frames 8586000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431124/1666666 [2:14:30<18:39, 210.45it/s]

finished frames 8586600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431234/1666666 [2:14:31<18:28, 212.40it/s]

finished frames 8587200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431344/1666666 [2:14:31<18:24, 213.05it/s]

finished frames 8587800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431432/1666666 [2:14:32<18:26, 212.60it/s]

finished frames 8588400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431542/1666666 [2:14:32<18:23, 213.06it/s]

finished frames 8589000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431630/1666666 [2:14:33<18:25, 212.59it/s]

finished frames 8589600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431740/1666666 [2:14:33<18:21, 213.25it/s]

finished frames 8590200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431828/1666666 [2:14:34<18:22, 213.09it/s]

finished frames 8590800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1431938/1666666 [2:14:34<18:21, 213.03it/s]

finished frames 8591400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432026/1666666 [2:14:35<20:46, 188.30it/s]

finished frames 8592000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432135/1666666 [2:14:35<20:27, 191.05it/s]

finished frames 8592600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432222/1666666 [2:14:36<18:55, 206.38it/s]

finished frames 8593200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432332/1666666 [2:14:36<18:19, 213.09it/s]

finished frames 8593800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432442/1666666 [2:14:37<18:15, 213.79it/s]

finished frames 8594400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432530/1666666 [2:14:37<18:21, 212.57it/s]

finished frames 8595000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432640/1666666 [2:14:38<18:19, 212.77it/s]

finished frames 8595600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432728/1666666 [2:14:38<18:23, 212.04it/s]

finished frames 8596200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432838/1666666 [2:14:38<18:19, 212.75it/s]

finished frames 8596800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1432926/1666666 [2:14:39<18:30, 210.43it/s]

finished frames 8597400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433036/1666666 [2:14:39<18:42, 208.13it/s]

finished frames 8598000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433124/1666666 [2:14:40<18:25, 211.17it/s]

finished frames 8598600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433234/1666666 [2:14:40<18:15, 213.03it/s]

finished frames 8599200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433322/1666666 [2:14:41<18:21, 211.85it/s]

finished frames 8599800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433432/1666666 [2:14:41<18:30, 210.01it/s]

finished frames 8600400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433538/1666666 [2:14:42<18:40, 208.10it/s]

finished frames 8601000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433622/1666666 [2:14:42<18:51, 205.93it/s]

finished frames 8601600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433727/1666666 [2:14:43<18:41, 207.73it/s]

finished frames 8602200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433832/1666666 [2:14:43<18:47, 206.57it/s]

finished frames 8602800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1433938/1666666 [2:14:44<18:35, 208.58it/s]

finished frames 8603400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434025/1666666 [2:14:44<18:51, 205.56it/s]

finished frames 8604000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434135/1666666 [2:14:45<18:18, 211.68it/s]

finished frames 8604600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434223/1666666 [2:14:45<18:16, 211.99it/s]

finished frames 8605200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434333/1666666 [2:14:46<18:10, 213.06it/s]

finished frames 8605800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434442/1666666 [2:14:46<18:46, 206.15it/s]

finished frames 8606400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434530/1666666 [2:14:47<18:27, 209.61it/s]

finished frames 8607000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434635/1666666 [2:14:47<18:56, 204.12it/s]

finished frames 8607600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434743/1666666 [2:14:48<18:26, 209.59it/s]

finished frames 8608200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434828/1666666 [2:14:48<18:28, 209.11it/s]

finished frames 8608800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1434935/1666666 [2:14:49<18:35, 207.77it/s]

finished frames 8609400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435020/1666666 [2:14:49<19:02, 202.80it/s]

finished frames 8610000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435126/1666666 [2:14:49<18:56, 203.67it/s]

finished frames 8610600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435231/1666666 [2:14:50<18:47, 205.20it/s]

finished frames 8611200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435336/1666666 [2:14:50<18:43, 205.96it/s]

finished frames 8611800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435441/1666666 [2:14:51<18:44, 205.55it/s]

finished frames 8612400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435525/1666666 [2:14:51<18:44, 205.58it/s]

finished frames 8613000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435630/1666666 [2:14:52<18:40, 206.21it/s]

finished frames 8613600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435736/1666666 [2:14:52<18:27, 208.54it/s]

finished frames 8614200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435842/1666666 [2:14:53<18:25, 208.83it/s]

finished frames 8614800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1435927/1666666 [2:14:53<18:22, 209.20it/s]

finished frames 8615400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436032/1666666 [2:14:54<18:54, 203.26it/s]

finished frames 8616000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436138/1666666 [2:14:54<18:26, 208.40it/s]

finished frames 8616600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436223/1666666 [2:14:55<18:24, 208.57it/s]

finished frames 8617200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436330/1666666 [2:14:55<18:25, 208.40it/s]

finished frames 8617800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436436/1666666 [2:14:56<18:24, 208.51it/s]

finished frames 8618400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436543/1666666 [2:14:56<18:22, 208.67it/s]

finished frames 8619000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436629/1666666 [2:14:57<18:24, 208.29it/s]

finished frames 8619600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436738/1666666 [2:14:57<19:12, 199.54it/s]

finished frames 8620200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436825/1666666 [2:14:58<21:33, 177.74it/s]

finished frames 8620800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1436934/1666666 [2:14:58<18:36, 205.68it/s]

finished frames 8621400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1437041/1666666 [2:14:59<18:34, 206.12it/s]

finished frames 8622000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1437128/1666666 [2:14:59<18:13, 209.97it/s]

finished frames 8622600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1437238/1666666 [2:15:00<18:08, 210.87it/s]

finished frames 8623200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1437326/1666666 [2:15:00<18:13, 209.78it/s]

finished frames 8623800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▌ | 1437436/1666666 [2:15:01<18:02, 211.69it/s]

finished frames 8624400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1437524/1666666 [2:15:01<18:02, 211.70it/s]

finished frames 8625000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1437634/1666666 [2:15:02<17:51, 213.83it/s]

finished frames 8625600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1437722/1666666 [2:15:02<17:59, 212.08it/s]

finished frames 8626200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1437832/1666666 [2:15:02<17:49, 213.90it/s]

finished frames 8626800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1437942/1666666 [2:15:03<17:56, 212.53it/s]

finished frames 8627400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438029/1666666 [2:15:03<18:31, 205.64it/s]

finished frames 8628000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438139/1666666 [2:15:04<18:02, 211.15it/s]

finished frames 8628600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438227/1666666 [2:15:04<18:00, 211.41it/s]

finished frames 8629200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438337/1666666 [2:15:05<17:55, 212.27it/s]

finished frames 8629800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438425/1666666 [2:15:05<17:57, 211.79it/s]

finished frames 8630400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438535/1666666 [2:15:06<17:56, 211.91it/s]

finished frames 8631000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438623/1666666 [2:15:06<17:56, 211.82it/s]

finished frames 8631600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438733/1666666 [2:15:07<17:54, 212.07it/s]

finished frames 8632200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438843/1666666 [2:15:07<17:51, 212.52it/s]

finished frames 8632800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1438931/1666666 [2:15:08<17:53, 212.08it/s]

finished frames 8633400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439041/1666666 [2:15:08<18:16, 207.62it/s]

finished frames 8634000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439127/1666666 [2:15:09<18:52, 200.84it/s]

finished frames 8634600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439235/1666666 [2:15:09<19:00, 199.47it/s]

finished frames 8635200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439322/1666666 [2:15:10<18:13, 207.89it/s]

finished frames 8635800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439432/1666666 [2:15:10<17:38, 214.62it/s]

finished frames 8636400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439542/1666666 [2:15:11<17:38, 214.64it/s]

finished frames 8637000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439630/1666666 [2:15:11<17:44, 213.37it/s]

finished frames 8637600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439740/1666666 [2:15:12<17:40, 213.98it/s]

finished frames 8638200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439828/1666666 [2:15:12<17:45, 212.94it/s]

finished frames 8638800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1439937/1666666 [2:15:12<18:00, 209.86it/s]

finished frames 8639400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440021/1666666 [2:15:13<18:48, 200.84it/s]

finished frames 8640000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440128/1666666 [2:15:13<18:07, 208.32it/s]

finished frames 8640600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440234/1666666 [2:15:14<18:02, 209.16it/s]

finished frames 8641200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440339/1666666 [2:15:14<18:03, 208.96it/s]

finished frames 8641800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440423/1666666 [2:15:15<18:08, 207.90it/s]

finished frames 8642400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440528/1666666 [2:15:15<18:10, 207.31it/s]

finished frames 8643000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440634/1666666 [2:15:16<18:08, 207.60it/s]

finished frames 8643600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440739/1666666 [2:15:16<18:08, 207.56it/s]

finished frames 8644200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440823/1666666 [2:15:17<18:13, 206.51it/s]

finished frames 8644800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1440928/1666666 [2:15:17<18:12, 206.62it/s]

finished frames 8645400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1441033/1666666 [2:15:18<18:34, 202.45it/s]

finished frames 8646000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1441138/1666666 [2:15:18<18:16, 205.75it/s]

finished frames 8646600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1441222/1666666 [2:15:19<18:13, 206.13it/s]

finished frames 8647200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1441328/1666666 [2:15:19<18:00, 208.53it/s]

finished frames 8647800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1441433/1666666 [2:15:20<19:41, 190.62it/s]

finished frames 8648400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1441517/1666666 [2:15:20<18:34, 201.95it/s]

finished frames 8649000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 86%|████████▋ | 1441622/1666666 [2:15:21<18:38, 201.20it/s]

finished frames 8649600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1441728/1666666 [2:15:21<18:05, 207.24it/s]

finished frames 8650200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1441833/1666666 [2:15:22<18:06, 207.00it/s]

finished frames 8650800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1441938/1666666 [2:15:22<18:05, 207.11it/s]

finished frames 8651400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442022/1666666 [2:15:23<18:26, 203.00it/s]

finished frames 8652000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442127/1666666 [2:15:23<18:04, 206.96it/s]

finished frames 8652600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442234/1666666 [2:15:24<17:58, 208.06it/s]

finished frames 8653200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442340/1666666 [2:15:24<18:00, 207.62it/s]

finished frames 8653800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442424/1666666 [2:15:25<18:06, 206.44it/s]

finished frames 8654400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442529/1666666 [2:15:25<18:03, 206.91it/s]

finished frames 8655000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442634/1666666 [2:15:26<18:11, 205.22it/s]

finished frames 8655600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442739/1666666 [2:15:26<18:05, 206.20it/s]

finished frames 8656200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442823/1666666 [2:15:26<18:06, 206.00it/s]

finished frames 8656800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1442928/1666666 [2:15:27<17:52, 208.70it/s]

finished frames 8657400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443034/1666666 [2:15:28<18:18, 203.63it/s]

finished frames 8658000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443142/1666666 [2:15:28<17:40, 210.68it/s]

finished frames 8658600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443230/1666666 [2:15:28<17:35, 211.71it/s]

finished frames 8659200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443340/1666666 [2:15:29<17:29, 212.82it/s]

finished frames 8659800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443428/1666666 [2:15:29<17:29, 212.78it/s]

finished frames 8660400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443538/1666666 [2:15:30<17:29, 212.57it/s]

finished frames 8661000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443626/1666666 [2:15:30<17:30, 212.32it/s]

finished frames 8661600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443736/1666666 [2:15:31<17:26, 212.95it/s]

finished frames 8662200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443824/1666666 [2:15:31<18:26, 201.41it/s]

finished frames 8662800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1443934/1666666 [2:15:32<18:02, 205.73it/s]

finished frames 8663400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444022/1666666 [2:15:32<18:10, 204.09it/s]

finished frames 8664000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444130/1666666 [2:15:33<17:42, 209.49it/s]

finished frames 8664600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444240/1666666 [2:15:33<17:33, 211.06it/s]

finished frames 8665200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444328/1666666 [2:15:34<17:32, 211.17it/s]

finished frames 8665800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444438/1666666 [2:15:34<17:31, 211.35it/s]

finished frames 8666400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444526/1666666 [2:15:35<17:30, 211.45it/s]

finished frames 8667000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444636/1666666 [2:15:35<17:30, 211.31it/s]

finished frames 8667600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444724/1666666 [2:15:36<17:32, 210.95it/s]

finished frames 8668200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444834/1666666 [2:15:36<17:23, 212.51it/s]

finished frames 8668800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1444922/1666666 [2:15:37<17:30, 211.00it/s]

finished frames 8669400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445032/1666666 [2:15:37<17:45, 207.94it/s]

finished frames 8670000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445140/1666666 [2:15:38<17:33, 210.35it/s]

finished frames 8670600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445228/1666666 [2:15:38<17:31, 210.57it/s]

finished frames 8671200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445338/1666666 [2:15:39<17:26, 211.45it/s]

finished frames 8671800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445426/1666666 [2:15:39<17:25, 211.52it/s]

finished frames 8672400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445536/1666666 [2:15:39<17:19, 212.67it/s]

finished frames 8673000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445624/1666666 [2:15:40<17:19, 212.56it/s]

finished frames 8673600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445734/1666666 [2:15:40<17:18, 212.75it/s]

finished frames 8674200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445844/1666666 [2:15:41<17:17, 212.80it/s]

finished frames 8674800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1445932/1666666 [2:15:41<17:20, 212.15it/s]

finished frames 8675400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446020/1666666 [2:15:42<17:52, 205.68it/s]

finished frames 8676000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446129/1666666 [2:15:42<18:57, 193.92it/s]

finished frames 8676600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446238/1666666 [2:15:43<17:32, 209.36it/s]

finished frames 8677200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446326/1666666 [2:15:43<17:22, 211.40it/s]

finished frames 8677800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446436/1666666 [2:15:44<17:15, 212.73it/s]

finished frames 8678400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446524/1666666 [2:15:44<17:14, 212.88it/s]

finished frames 8679000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446634/1666666 [2:15:45<17:15, 212.45it/s]

finished frames 8679600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446722/1666666 [2:15:45<17:13, 212.76it/s]

finished frames 8680200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446832/1666666 [2:15:46<17:10, 213.26it/s]

finished frames 8680800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1446942/1666666 [2:15:46<17:13, 212.63it/s]

finished frames 8681400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447029/1666666 [2:15:47<17:48, 205.50it/s]

finished frames 8682000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447137/1666666 [2:15:47<17:25, 210.05it/s]

finished frames 8682600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447225/1666666 [2:15:47<17:20, 210.86it/s]

finished frames 8683200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447335/1666666 [2:15:48<17:21, 210.50it/s]

finished frames 8683800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447423/1666666 [2:15:48<17:21, 210.42it/s]

finished frames 8684400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447531/1666666 [2:15:49<17:36, 207.46it/s]

finished frames 8685000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447638/1666666 [2:15:49<17:25, 209.49it/s]

finished frames 8685600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447726/1666666 [2:15:50<17:18, 210.84it/s]

finished frames 8686200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447836/1666666 [2:15:50<17:11, 212.19it/s]

finished frames 8686800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1447924/1666666 [2:15:51<17:07, 212.98it/s]

finished frames 8687400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448034/1666666 [2:15:51<17:29, 208.37it/s]

finished frames 8688000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448122/1666666 [2:15:52<17:09, 212.29it/s]

finished frames 8688600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448232/1666666 [2:15:52<17:01, 213.80it/s]

finished frames 8689200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448342/1666666 [2:15:53<17:02, 213.43it/s]

finished frames 8689800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448430/1666666 [2:15:53<17:12, 211.27it/s]

finished frames 8690400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448539/1666666 [2:15:54<17:13, 211.16it/s]

finished frames 8691000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448629/1666666 [2:15:54<17:03, 213.04it/s]

finished frames 8691600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448739/1666666 [2:15:55<17:09, 211.73it/s]

finished frames 8692200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448825/1666666 [2:15:55<17:19, 209.55it/s]

finished frames 8692800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1448933/1666666 [2:15:56<17:01, 213.12it/s]

finished frames 8693400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449021/1666666 [2:15:56<18:03, 200.89it/s]

finished frames 8694000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449131/1666666 [2:15:56<16:55, 214.12it/s]

finished frames 8694600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449241/1666666 [2:15:57<16:44, 216.50it/s]

finished frames 8695200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449329/1666666 [2:15:57<16:44, 216.45it/s]

finished frames 8695800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449439/1666666 [2:15:58<16:42, 216.78it/s]

finished frames 8696400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449527/1666666 [2:15:58<17:11, 210.44it/s]

finished frames 8697000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449637/1666666 [2:15:59<17:06, 211.49it/s]

finished frames 8697600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449725/1666666 [2:15:59<17:10, 210.53it/s]

finished frames 8698200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449835/1666666 [2:16:00<16:55, 213.47it/s]

finished frames 8698800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1449923/1666666 [2:16:00<16:46, 215.40it/s]

finished frames 8699400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450033/1666666 [2:16:01<17:03, 211.61it/s]

finished frames 8700000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450143/1666666 [2:16:01<17:00, 212.18it/s]

finished frames 8700600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450231/1666666 [2:16:02<16:57, 212.75it/s]

finished frames 8701200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450341/1666666 [2:16:02<16:46, 214.84it/s]

finished frames 8701800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450429/1666666 [2:16:03<16:53, 213.39it/s]

finished frames 8702400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450538/1666666 [2:16:03<17:08, 210.16it/s]

finished frames 8703000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450626/1666666 [2:16:03<16:57, 212.33it/s]

finished frames 8703600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450736/1666666 [2:16:04<16:47, 214.38it/s]

finished frames 8704200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450824/1666666 [2:16:04<16:55, 212.65it/s]

finished frames 8704800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1450934/1666666 [2:16:05<17:52, 201.07it/s]

finished frames 8705400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451020/1666666 [2:16:05<20:47, 172.84it/s]

finished frames 8706000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451130/1666666 [2:16:06<17:27, 205.79it/s]

finished frames 8706600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451240/1666666 [2:16:06<16:54, 212.38it/s]

finished frames 8707200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451328/1666666 [2:16:07<16:51, 212.94it/s]

finished frames 8707800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451438/1666666 [2:16:07<16:45, 214.01it/s]

finished frames 8708400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451526/1666666 [2:16:08<16:47, 213.47it/s]

finished frames 8709000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451636/1666666 [2:16:08<16:46, 213.60it/s]

finished frames 8709600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451724/1666666 [2:16:09<16:49, 212.83it/s]

finished frames 8710200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451834/1666666 [2:16:09<16:48, 213.11it/s]

finished frames 8710800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1451944/1666666 [2:16:10<16:44, 213.85it/s]

finished frames 8711400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452031/1666666 [2:16:10<17:19, 206.52it/s]

finished frames 8712000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452139/1666666 [2:16:11<17:02, 209.74it/s]

finished frames 8712600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452227/1666666 [2:16:11<16:58, 210.62it/s]

finished frames 8713200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452337/1666666 [2:16:12<17:00, 210.12it/s]

finished frames 8713800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452425/1666666 [2:16:12<17:01, 209.77it/s]

finished frames 8714400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452535/1666666 [2:16:13<17:01, 209.61it/s]

finished frames 8715000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452642/1666666 [2:16:13<17:02, 209.31it/s]

finished frames 8715600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452726/1666666 [2:16:14<17:05, 208.59it/s]

finished frames 8716200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452834/1666666 [2:16:14<16:59, 209.82it/s]

finished frames 8716800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1452942/1666666 [2:16:15<16:56, 210.34it/s]

finished frames 8717400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453029/1666666 [2:16:15<17:23, 204.80it/s]

finished frames 8718000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453136/1666666 [2:16:15<17:02, 208.78it/s]

finished frames 8718600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453222/1666666 [2:16:16<17:00, 209.18it/s]

finished frames 8719200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453332/1666666 [2:16:16<16:51, 210.85it/s]

finished frames 8719800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453440/1666666 [2:16:17<17:13, 206.33it/s]

finished frames 8720400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453524/1666666 [2:16:17<17:12, 206.51it/s]

finished frames 8721000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453632/1666666 [2:16:18<16:57, 209.37it/s]

finished frames 8721600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453742/1666666 [2:16:18<16:45, 211.86it/s]

finished frames 8722200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453830/1666666 [2:16:19<16:42, 212.27it/s]

finished frames 8722800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1453940/1666666 [2:16:19<16:35, 213.62it/s]

finished frames 8723400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454027/1666666 [2:16:20<17:22, 203.97it/s]

finished frames 8724000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454137/1666666 [2:16:20<16:43, 211.69it/s]

finished frames 8724600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454225/1666666 [2:16:21<16:42, 211.85it/s]

finished frames 8725200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454335/1666666 [2:16:21<16:41, 212.06it/s]

finished frames 8725800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454423/1666666 [2:16:22<16:40, 212.06it/s]

finished frames 8726400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454533/1666666 [2:16:22<16:36, 212.90it/s]

finished frames 8727000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454643/1666666 [2:16:23<16:35, 212.90it/s]

finished frames 8727600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454731/1666666 [2:16:23<16:37, 212.38it/s]

finished frames 8728200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454841/1666666 [2:16:24<16:34, 212.91it/s]

finished frames 8728800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1454929/1666666 [2:16:24<16:35, 212.61it/s]

finished frames 8729400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455039/1666666 [2:16:24<17:00, 207.43it/s]

finished frames 8730000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455127/1666666 [2:16:25<16:42, 211.05it/s]

finished frames 8730600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455237/1666666 [2:16:25<16:35, 212.47it/s]

finished frames 8731200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455325/1666666 [2:16:26<16:36, 212.09it/s]

finished frames 8731800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455435/1666666 [2:16:26<16:34, 212.44it/s]

finished frames 8732400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455523/1666666 [2:16:27<16:38, 211.41it/s]

finished frames 8733000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455633/1666666 [2:16:27<16:28, 213.41it/s]

finished frames 8733600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455743/1666666 [2:16:28<16:29, 213.09it/s]

finished frames 8734200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455831/1666666 [2:16:28<17:11, 204.34it/s]

finished frames 8734800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1455941/1666666 [2:16:29<16:34, 211.98it/s]

finished frames 8735400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456028/1666666 [2:16:29<16:58, 206.85it/s]

finished frames 8736000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456133/1666666 [2:16:30<17:02, 205.88it/s]

finished frames 8736600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456238/1666666 [2:16:30<17:02, 205.70it/s]

finished frames 8737200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456324/1666666 [2:16:31<16:49, 208.38it/s]

finished frames 8737800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456430/1666666 [2:16:31<16:45, 209.17it/s]

finished frames 8738400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456536/1666666 [2:16:32<16:43, 209.31it/s]

finished frames 8739000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456643/1666666 [2:16:32<16:40, 209.91it/s]

finished frames 8739600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456727/1666666 [2:16:33<16:44, 208.91it/s]

finished frames 8740200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456834/1666666 [2:16:33<16:44, 208.87it/s]

finished frames 8740800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1456940/1666666 [2:16:34<16:42, 209.17it/s]

finished frames 8741400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457024/1666666 [2:16:34<17:06, 204.16it/s]

finished frames 8742000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457130/1666666 [2:16:34<16:46, 208.22it/s]

finished frames 8742600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457237/1666666 [2:16:35<16:40, 209.33it/s]

finished frames 8743200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457321/1666666 [2:16:35<16:43, 208.53it/s]

finished frames 8743800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457430/1666666 [2:16:36<16:35, 210.14it/s]

finished frames 8744400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457537/1666666 [2:16:36<16:38, 209.35it/s]

finished frames 8745000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457642/1666666 [2:16:37<16:40, 208.87it/s]

finished frames 8745600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457726/1666666 [2:16:37<16:42, 208.50it/s]

finished frames 8746200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457832/1666666 [2:16:38<16:40, 208.71it/s]

finished frames 8746800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1457937/1666666 [2:16:38<16:39, 208.87it/s]

finished frames 8747400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1458022/1666666 [2:16:39<18:06, 192.08it/s]

finished frames 8748000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1458128/1666666 [2:16:39<17:52, 194.48it/s]

finished frames 8748600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 87%|████████▋ | 1458233/1666666 [2:16:40<16:52, 205.83it/s]

finished frames 8749200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1458338/1666666 [2:16:40<16:50, 206.25it/s]

finished frames 8749800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1458423/1666666 [2:16:41<16:39, 208.43it/s]

finished frames 8750400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1458529/1666666 [2:16:41<16:41, 207.93it/s]

finished frames 8751000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1458634/1666666 [2:16:42<16:40, 208.01it/s]

finished frames 8751600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1458740/1666666 [2:16:42<16:37, 208.38it/s]

finished frames 8752200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1458824/1666666 [2:16:43<16:37, 208.42it/s]

finished frames 8752800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1458930/1666666 [2:16:43<16:36, 208.56it/s]

finished frames 8753400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459035/1666666 [2:16:44<17:00, 203.39it/s]

finished frames 8754000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459141/1666666 [2:16:44<16:37, 207.99it/s]

finished frames 8754600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459225/1666666 [2:16:45<16:39, 207.48it/s]

finished frames 8755200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459330/1666666 [2:16:45<16:36, 208.07it/s]

finished frames 8755800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459435/1666666 [2:16:46<16:38, 207.57it/s]

finished frames 8756400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459540/1666666 [2:16:46<16:36, 207.80it/s]

finished frames 8757000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459624/1666666 [2:16:47<16:34, 208.13it/s]

finished frames 8757600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459732/1666666 [2:16:47<16:27, 209.50it/s]

finished frames 8758200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459838/1666666 [2:16:48<16:29, 209.12it/s]

finished frames 8758800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1459922/1666666 [2:16:48<16:30, 208.73it/s]

finished frames 8759400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460031/1666666 [2:16:48<16:38, 206.94it/s]

finished frames 8760000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460140/1666666 [2:16:49<16:24, 209.83it/s]

finished frames 8760600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460227/1666666 [2:16:49<16:22, 210.21it/s]

finished frames 8761200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460336/1666666 [2:16:50<17:50, 192.83it/s]

finished frames 8761800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460443/1666666 [2:16:50<16:39, 206.42it/s]

finished frames 8762400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460528/1666666 [2:16:51<16:27, 208.74it/s]

finished frames 8763000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460638/1666666 [2:16:51<16:20, 210.09it/s]

finished frames 8763600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460726/1666666 [2:16:52<16:21, 209.80it/s]

finished frames 8764200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460833/1666666 [2:16:52<16:22, 209.39it/s]

finished frames 8764800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1460941/1666666 [2:16:53<16:19, 210.12it/s]

finished frames 8765400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461028/1666666 [2:16:53<16:43, 204.92it/s]

finished frames 8766000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461136/1666666 [2:16:54<16:25, 208.60it/s]

finished frames 8766600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461242/1666666 [2:16:54<16:22, 209.10it/s]

finished frames 8767200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461326/1666666 [2:16:55<16:23, 208.77it/s]

finished frames 8767800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461431/1666666 [2:16:55<16:46, 203.91it/s]

finished frames 8768400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461536/1666666 [2:16:56<16:48, 203.39it/s]

finished frames 8769000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461641/1666666 [2:16:56<16:48, 203.37it/s]

finished frames 8769600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461725/1666666 [2:16:57<16:53, 202.13it/s]

finished frames 8770200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461830/1666666 [2:16:57<16:50, 202.75it/s]

finished frames 8770800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1461935/1666666 [2:16:58<16:45, 203.55it/s]

finished frames 8771400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462040/1666666 [2:16:58<17:05, 199.46it/s]

finished frames 8772000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462124/1666666 [2:16:59<16:52, 201.95it/s]

finished frames 8772600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462229/1666666 [2:16:59<16:47, 202.96it/s]

finished frames 8773200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462334/1666666 [2:17:00<16:44, 203.48it/s]

finished frames 8773800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462439/1666666 [2:17:00<16:42, 203.76it/s]

finished frames 8774400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462523/1666666 [2:17:01<16:28, 206.43it/s]

finished frames 8775000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462629/1666666 [2:17:01<16:16, 209.00it/s]

finished frames 8775600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462735/1666666 [2:17:02<16:38, 204.28it/s]

finished frames 8776200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462841/1666666 [2:17:02<16:18, 208.26it/s]

finished frames 8776800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1462926/1666666 [2:17:03<16:15, 208.93it/s]

finished frames 8777400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463034/1666666 [2:17:03<16:33, 204.88it/s]

finished frames 8778000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463141/1666666 [2:17:04<16:14, 208.89it/s]

finished frames 8778600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463225/1666666 [2:17:04<16:14, 208.78it/s]

finished frames 8779200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463335/1666666 [2:17:05<16:06, 210.28it/s]

finished frames 8779800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463423/1666666 [2:17:05<16:06, 210.24it/s]

finished frames 8780400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463533/1666666 [2:17:05<16:00, 211.49it/s]

finished frames 8781000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463643/1666666 [2:17:06<16:03, 210.64it/s]

finished frames 8781600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463729/1666666 [2:17:06<16:08, 209.64it/s]

finished frames 8782200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463836/1666666 [2:17:07<16:09, 209.26it/s]

finished frames 8782800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1463943/1666666 [2:17:07<16:05, 209.91it/s]

finished frames 8783400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464027/1666666 [2:17:08<16:32, 204.27it/s]

finished frames 8784000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464136/1666666 [2:17:08<15:57, 211.55it/s]

finished frames 8784600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464224/1666666 [2:17:09<15:53, 212.30it/s]

finished frames 8785200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464334/1666666 [2:17:09<15:56, 211.59it/s]

finished frames 8785800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464422/1666666 [2:17:10<15:56, 211.39it/s]

finished frames 8786400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464532/1666666 [2:17:10<15:50, 212.66it/s]

finished frames 8787000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464642/1666666 [2:17:11<15:49, 212.73it/s]

finished frames 8787600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464730/1666666 [2:17:11<15:52, 212.04it/s]

finished frames 8788200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464840/1666666 [2:17:12<15:50, 212.34it/s]

finished frames 8788800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1464928/1666666 [2:17:12<15:50, 212.15it/s]

finished frames 8789400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465016/1666666 [2:17:13<17:43, 189.52it/s]

finished frames 8790000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465125/1666666 [2:17:13<17:35, 191.02it/s]

finished frames 8790600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465235/1666666 [2:17:14<16:05, 208.56it/s]

finished frames 8791200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465323/1666666 [2:17:14<15:55, 210.63it/s]

finished frames 8791800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465433/1666666 [2:17:15<15:43, 213.22it/s]

finished frames 8792400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465543/1666666 [2:17:15<15:46, 212.60it/s]

finished frames 8793000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465631/1666666 [2:17:15<15:48, 211.86it/s]

finished frames 8793600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465741/1666666 [2:17:16<15:45, 212.42it/s]

finished frames 8794200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465829/1666666 [2:17:16<15:45, 212.44it/s]

finished frames 8794800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1465939/1666666 [2:17:17<15:40, 213.51it/s]

finished frames 8795400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466027/1666666 [2:17:17<16:03, 208.31it/s]

finished frames 8796000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466137/1666666 [2:17:18<15:47, 211.64it/s]

finished frames 8796600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466225/1666666 [2:17:18<15:45, 211.94it/s]

finished frames 8797200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466335/1666666 [2:17:19<15:46, 211.63it/s]

finished frames 8797800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466423/1666666 [2:17:19<15:45, 211.72it/s]

finished frames 8798400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466533/1666666 [2:17:20<15:42, 212.34it/s]

finished frames 8799000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466643/1666666 [2:17:20<15:41, 212.40it/s]

finished frames 8799600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466731/1666666 [2:17:21<15:41, 212.29it/s]

finished frames 8800200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466841/1666666 [2:17:21<15:43, 211.89it/s]

finished frames 8800800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1466929/1666666 [2:17:22<15:42, 211.93it/s]

finished frames 8801400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467039/1666666 [2:17:22<16:03, 207.28it/s]

finished frames 8802000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467126/1666666 [2:17:23<15:48, 210.46it/s]

finished frames 8802600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467236/1666666 [2:17:23<15:42, 211.63it/s]

finished frames 8803200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467324/1666666 [2:17:23<15:40, 211.87it/s]

finished frames 8803800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467434/1666666 [2:17:24<16:14, 204.45it/s]

finished frames 8804400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467522/1666666 [2:17:24<17:10, 193.18it/s]

finished frames 8805000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467631/1666666 [2:17:25<15:55, 208.40it/s]

finished frames 8805600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467741/1666666 [2:17:25<15:39, 211.80it/s]

finished frames 8806200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467829/1666666 [2:17:26<15:36, 212.28it/s]

finished frames 8806800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1467939/1666666 [2:17:26<15:34, 212.63it/s]

finished frames 8807400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468026/1666666 [2:17:27<15:59, 206.93it/s]

finished frames 8808000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468136/1666666 [2:17:27<15:41, 210.85it/s]

finished frames 8808600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468224/1666666 [2:17:28<15:36, 211.92it/s]

finished frames 8809200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468334/1666666 [2:17:28<15:28, 213.51it/s]

finished frames 8809800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468422/1666666 [2:17:29<15:34, 212.09it/s]

finished frames 8810400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468532/1666666 [2:17:29<15:28, 213.28it/s]

finished frames 8811000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468642/1666666 [2:17:30<15:31, 212.61it/s]

finished frames 8811600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468730/1666666 [2:17:30<15:31, 212.50it/s]

finished frames 8812200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468840/1666666 [2:17:31<15:31, 212.49it/s]

finished frames 8812800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1468928/1666666 [2:17:31<15:32, 212.09it/s]

finished frames 8813400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469038/1666666 [2:17:32<15:54, 207.09it/s]

finished frames 8814000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469126/1666666 [2:17:32<15:39, 210.17it/s]

finished frames 8814600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469236/1666666 [2:17:33<15:32, 211.75it/s]

finished frames 8815200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469324/1666666 [2:17:33<15:32, 211.69it/s]

finished frames 8815800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469434/1666666 [2:17:33<15:29, 212.24it/s]

finished frames 8816400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469543/1666666 [2:17:34<15:44, 208.74it/s]

finished frames 8817000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469627/1666666 [2:17:34<15:53, 206.73it/s]

finished frames 8817600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469732/1666666 [2:17:35<15:49, 207.36it/s]

finished frames 8818200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469837/1666666 [2:17:35<16:08, 203.18it/s]

finished frames 8818800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1469942/1666666 [2:17:36<16:20, 200.70it/s]

finished frames 8819400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470026/1666666 [2:17:36<16:21, 200.32it/s]

finished frames 8820000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470131/1666666 [2:17:37<16:01, 204.30it/s]

finished frames 8820600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470236/1666666 [2:17:37<15:56, 205.37it/s]

finished frames 8821200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470341/1666666 [2:17:38<15:52, 206.14it/s]

finished frames 8821800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470425/1666666 [2:17:38<15:58, 204.64it/s]

finished frames 8822400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470530/1666666 [2:17:39<15:55, 205.27it/s]

finished frames 8823000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470635/1666666 [2:17:39<15:54, 205.48it/s]

finished frames 8823600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470740/1666666 [2:17:40<15:55, 205.05it/s]

finished frames 8824200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470824/1666666 [2:17:40<15:59, 204.17it/s]

finished frames 8824800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1470929/1666666 [2:17:41<15:55, 204.86it/s]

finished frames 8825400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471034/1666666 [2:17:41<16:15, 200.48it/s]

finished frames 8826000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471140/1666666 [2:17:42<15:45, 206.86it/s]

finished frames 8826600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471224/1666666 [2:17:42<15:51, 205.30it/s]

finished frames 8827200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471334/1666666 [2:17:43<15:28, 210.43it/s]

finished frames 8827800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471444/1666666 [2:17:43<15:14, 213.53it/s]

finished frames 8828400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471532/1666666 [2:17:44<15:24, 211.17it/s]

finished frames 8829000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471642/1666666 [2:17:44<15:19, 212.19it/s]

finished frames 8829600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471730/1666666 [2:17:45<15:19, 212.06it/s]

finished frames 8830200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471840/1666666 [2:17:45<15:16, 212.68it/s]

finished frames 8830800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1471928/1666666 [2:17:46<15:20, 211.58it/s]

finished frames 8831400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472038/1666666 [2:17:46<15:35, 208.08it/s]

finished frames 8832000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472124/1666666 [2:17:47<16:02, 202.21it/s]

finished frames 8832600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472231/1666666 [2:17:47<17:32, 184.68it/s]

finished frames 8833200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472336/1666666 [2:17:48<15:58, 202.77it/s]

finished frames 8833800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472441/1666666 [2:17:48<15:43, 205.90it/s]

finished frames 8834400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472525/1666666 [2:17:49<15:43, 205.84it/s]

finished frames 8835000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472630/1666666 [2:17:49<15:39, 206.58it/s]

finished frames 8835600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472735/1666666 [2:17:50<15:39, 206.42it/s]

finished frames 8836200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472840/1666666 [2:17:50<15:37, 206.65it/s]

finished frames 8836800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1472924/1666666 [2:17:50<15:37, 206.61it/s]

finished frames 8837400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473029/1666666 [2:17:51<15:56, 202.49it/s]

finished frames 8838000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473134/1666666 [2:17:51<15:43, 205.10it/s]

finished frames 8838600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473239/1666666 [2:17:52<15:42, 205.21it/s]

finished frames 8839200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473323/1666666 [2:17:52<15:41, 205.46it/s]

finished frames 8839800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473428/1666666 [2:17:53<15:30, 207.57it/s]

finished frames 8840400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473533/1666666 [2:17:53<16:07, 199.60it/s]

finished frames 8841000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473643/1666666 [2:17:54<15:18, 210.11it/s]

finished frames 8841600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473730/1666666 [2:17:54<15:14, 211.01it/s]

finished frames 8842200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473838/1666666 [2:17:55<15:24, 208.55it/s]

finished frames 8842800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1473923/1666666 [2:17:55<15:20, 209.45it/s]

finished frames 8843400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474033/1666666 [2:17:56<15:24, 208.41it/s]

finished frames 8844000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474143/1666666 [2:17:56<15:01, 213.67it/s]

finished frames 8844600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474231/1666666 [2:17:57<14:53, 215.41it/s]

finished frames 8845200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474341/1666666 [2:17:57<14:53, 215.33it/s]

finished frames 8845800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474429/1666666 [2:17:58<14:57, 214.21it/s]

finished frames 8846400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474539/1666666 [2:17:58<14:46, 216.80it/s]

finished frames 8847000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474627/1666666 [2:17:59<14:55, 214.49it/s]

finished frames 8847600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474737/1666666 [2:17:59<15:08, 211.25it/s]

finished frames 8848200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474825/1666666 [2:18:00<15:12, 210.25it/s]

finished frames 8848800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 88%|████████▊ | 1474935/1666666 [2:18:00<14:44, 216.74it/s]

finished frames 8849400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475023/1666666 [2:18:00<15:03, 212.12it/s]

finished frames 8850000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475133/1666666 [2:18:01<14:57, 213.48it/s]

finished frames 8850600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475243/1666666 [2:18:01<14:59, 212.82it/s]

finished frames 8851200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475331/1666666 [2:18:02<15:15, 208.96it/s]

finished frames 8851800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475440/1666666 [2:18:02<15:08, 210.47it/s]

finished frames 8852400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475528/1666666 [2:18:03<15:00, 212.22it/s]

finished frames 8853000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475638/1666666 [2:18:03<15:09, 209.96it/s]

finished frames 8853600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475726/1666666 [2:18:04<15:07, 210.47it/s]

finished frames 8854200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475836/1666666 [2:18:04<14:59, 212.20it/s]

finished frames 8854800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1475924/1666666 [2:18:05<15:00, 211.88it/s]

finished frames 8855400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476034/1666666 [2:18:05<15:17, 207.69it/s]

finished frames 8856000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476122/1666666 [2:18:06<15:07, 210.02it/s]

finished frames 8856600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476231/1666666 [2:18:06<15:10, 209.11it/s]

finished frames 8857200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476341/1666666 [2:18:07<15:03, 210.63it/s]

finished frames 8857800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476426/1666666 [2:18:07<15:24, 205.76it/s]

finished frames 8858400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476531/1666666 [2:18:08<15:26, 205.26it/s]

finished frames 8859000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476636/1666666 [2:18:08<15:25, 205.38it/s]

finished frames 8859600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476741/1666666 [2:18:09<15:27, 204.72it/s]

finished frames 8860200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476825/1666666 [2:18:09<15:31, 203.78it/s]

finished frames 8860800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1476931/1666666 [2:18:10<16:34, 190.76it/s]

finished frames 8861400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477038/1666666 [2:18:10<15:39, 201.83it/s]

finished frames 8862000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477124/1666666 [2:18:11<15:10, 208.28it/s]

finished frames 8862600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477234/1666666 [2:18:11<14:54, 211.88it/s]

finished frames 8863200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477322/1666666 [2:18:11<14:59, 210.55it/s]

finished frames 8863800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477432/1666666 [2:18:12<14:53, 211.89it/s]

finished frames 8864400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477541/1666666 [2:18:13<15:02, 209.63it/s]

finished frames 8865000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477625/1666666 [2:18:13<15:07, 208.22it/s]

finished frames 8865600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477733/1666666 [2:18:13<14:59, 210.00it/s]

finished frames 8866200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477843/1666666 [2:18:14<14:58, 210.16it/s]

finished frames 8866800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1477931/1666666 [2:18:14<14:59, 209.71it/s]

finished frames 8867400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478036/1666666 [2:18:15<15:25, 203.84it/s]

finished frames 8868000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478123/1666666 [2:18:15<15:04, 208.41it/s]

finished frames 8868600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478233/1666666 [2:18:16<14:53, 210.91it/s]

finished frames 8869200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478343/1666666 [2:18:16<14:54, 210.60it/s]

finished frames 8869800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478431/1666666 [2:18:17<14:55, 210.25it/s]

finished frames 8870400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478541/1666666 [2:18:17<14:53, 210.53it/s]

finished frames 8871000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478629/1666666 [2:18:18<14:52, 210.70it/s]

finished frames 8871600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478739/1666666 [2:18:18<14:51, 210.90it/s]

finished frames 8872200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478826/1666666 [2:18:19<14:59, 208.90it/s]

finished frames 8872800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1478933/1666666 [2:18:19<14:57, 209.28it/s]

finished frames 8873400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1479038/1666666 [2:18:20<15:22, 203.36it/s]

finished frames 8874000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▊ | 1479124/1666666 [2:18:20<15:01, 207.92it/s]

finished frames 8874600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1479230/1666666 [2:18:21<15:15, 204.69it/s]

finished frames 8875200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1479336/1666666 [2:18:21<15:01, 207.78it/s]

finished frames 8875800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1479423/1666666 [2:18:22<14:50, 210.38it/s]

finished frames 8876400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1479533/1666666 [2:18:22<14:37, 213.31it/s]

finished frames 8877000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1479643/1666666 [2:18:23<14:38, 212.78it/s]

finished frames 8877600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1479731/1666666 [2:18:23<14:41, 212.09it/s]

finished frames 8878200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1479841/1666666 [2:18:24<14:39, 212.37it/s]

finished frames 8878800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1479929/1666666 [2:18:24<14:40, 212.06it/s]

finished frames 8879400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480039/1666666 [2:18:24<15:00, 207.34it/s]

finished frames 8880000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480125/1666666 [2:18:25<14:48, 209.85it/s]

finished frames 8880600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480235/1666666 [2:18:25<14:40, 211.77it/s]

finished frames 8881200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480323/1666666 [2:18:26<14:38, 212.17it/s]

finished frames 8881800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480433/1666666 [2:18:26<14:38, 212.06it/s]

finished frames 8882400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480543/1666666 [2:18:27<14:37, 212.14it/s]

finished frames 8883000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480631/1666666 [2:18:27<14:35, 212.54it/s]

finished frames 8883600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480741/1666666 [2:18:28<14:30, 213.67it/s]

finished frames 8884200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480829/1666666 [2:18:28<14:32, 213.07it/s]

finished frames 8884800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1480939/1666666 [2:18:29<14:31, 213.16it/s]

finished frames 8885400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481026/1666666 [2:18:29<15:14, 203.05it/s]

finished frames 8886000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481131/1666666 [2:18:30<14:57, 206.66it/s]

finished frames 8886600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481236/1666666 [2:18:30<14:53, 207.47it/s]

finished frames 8887200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481341/1666666 [2:18:31<14:50, 208.03it/s]

finished frames 8887800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481425/1666666 [2:18:31<14:55, 206.94it/s]

finished frames 8888400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481530/1666666 [2:18:32<16:14, 190.02it/s]

finished frames 8889000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481635/1666666 [2:18:32<15:04, 204.53it/s]

finished frames 8889600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481740/1666666 [2:18:33<14:53, 206.95it/s]

finished frames 8890200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481824/1666666 [2:18:33<14:51, 207.35it/s]

finished frames 8890800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1481930/1666666 [2:18:33<14:44, 208.83it/s]

finished frames 8891400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482035/1666666 [2:18:34<15:06, 203.57it/s]

finished frames 8892000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482140/1666666 [2:18:35<14:53, 206.63it/s]

finished frames 8892600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482224/1666666 [2:18:35<14:50, 207.02it/s]

finished frames 8893200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482330/1666666 [2:18:35<14:42, 208.94it/s]

finished frames 8893800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482435/1666666 [2:18:36<14:46, 207.90it/s]

finished frames 8894400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482540/1666666 [2:18:36<14:44, 208.18it/s]

finished frames 8895000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482624/1666666 [2:18:37<14:46, 207.58it/s]

finished frames 8895600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482729/1666666 [2:18:37<14:48, 206.92it/s]

finished frames 8896200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482834/1666666 [2:18:38<14:46, 207.41it/s]

finished frames 8896800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1482940/1666666 [2:18:38<14:42, 208.23it/s]

finished frames 8897400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483024/1666666 [2:18:39<15:07, 202.33it/s]

finished frames 8898000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483129/1666666 [2:18:39<14:55, 204.93it/s]

finished frames 8898600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483234/1666666 [2:18:40<14:52, 205.43it/s]

finished frames 8899200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483339/1666666 [2:18:40<14:48, 206.24it/s]

finished frames 8899800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483423/1666666 [2:18:41<14:51, 205.50it/s]

finished frames 8900400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483528/1666666 [2:18:41<14:51, 205.51it/s]

finished frames 8901000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483633/1666666 [2:18:42<14:51, 205.33it/s]

finished frames 8901600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483738/1666666 [2:18:42<14:45, 206.53it/s]

finished frames 8902200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483822/1666666 [2:18:43<14:45, 206.44it/s]

finished frames 8902800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1483927/1666666 [2:18:43<14:39, 207.85it/s]

finished frames 8903400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484032/1666666 [2:18:44<15:04, 201.84it/s]

finished frames 8904000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484137/1666666 [2:18:44<14:47, 205.72it/s]

finished frames 8904600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484242/1666666 [2:18:45<14:43, 206.37it/s]

finished frames 8905200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484326/1666666 [2:18:45<14:43, 206.38it/s]

finished frames 8905800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484431/1666666 [2:18:46<14:43, 206.36it/s]

finished frames 8906400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484536/1666666 [2:18:46<14:37, 207.62it/s]

finished frames 8907000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484641/1666666 [2:18:47<14:39, 206.98it/s]

finished frames 8907600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484725/1666666 [2:18:47<14:37, 207.31it/s]

finished frames 8908200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484834/1666666 [2:18:48<14:16, 212.32it/s]

finished frames 8908800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1484944/1666666 [2:18:48<14:05, 214.92it/s]

finished frames 8909400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485032/1666666 [2:18:48<14:27, 209.48it/s]

finished frames 8910000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485142/1666666 [2:18:49<14:08, 213.96it/s]

finished frames 8910600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485230/1666666 [2:18:49<14:06, 214.32it/s]

finished frames 8911200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485340/1666666 [2:18:50<14:02, 215.17it/s]

finished frames 8911800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485428/1666666 [2:18:50<14:05, 214.40it/s]

finished frames 8912400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485538/1666666 [2:18:51<14:04, 214.41it/s]

finished frames 8913000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485626/1666666 [2:18:51<14:08, 213.32it/s]

finished frames 8913600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485736/1666666 [2:18:52<14:06, 213.68it/s]

finished frames 8914200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485824/1666666 [2:18:52<14:06, 213.66it/s]

finished frames 8914800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1485934/1666666 [2:18:53<14:08, 212.99it/s]

finished frames 8915400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486022/1666666 [2:18:53<14:36, 206.08it/s]

finished frames 8916000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486132/1666666 [2:18:54<14:14, 211.22it/s]

finished frames 8916600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486242/1666666 [2:18:54<14:43, 204.14it/s]

finished frames 8917200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486330/1666666 [2:18:55<15:28, 194.32it/s]

finished frames 8917800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486439/1666666 [2:18:55<14:16, 210.50it/s]

finished frames 8918400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486527/1666666 [2:18:56<14:06, 212.88it/s]

finished frames 8919000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486637/1666666 [2:18:56<14:01, 213.92it/s]

finished frames 8919600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486725/1666666 [2:18:56<13:58, 214.51it/s]

finished frames 8920200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486835/1666666 [2:18:57<13:56, 215.08it/s]

finished frames 8920800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1486923/1666666 [2:18:57<13:58, 214.25it/s]

finished frames 8921400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487033/1666666 [2:18:58<14:11, 210.92it/s]

finished frames 8922000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487143/1666666 [2:18:58<13:59, 213.91it/s]

finished frames 8922600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487231/1666666 [2:18:59<13:58, 213.99it/s]

finished frames 8923200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487341/1666666 [2:18:59<13:54, 214.87it/s]

finished frames 8923800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487429/1666666 [2:19:00<13:58, 213.76it/s]

finished frames 8924400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487539/1666666 [2:19:00<13:57, 213.99it/s]

finished frames 8925000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487627/1666666 [2:19:01<13:56, 214.13it/s]

finished frames 8925600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487737/1666666 [2:19:01<13:57, 213.75it/s]

finished frames 8926200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487825/1666666 [2:19:02<13:47, 216.25it/s]

finished frames 8926800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1487935/1666666 [2:19:02<13:47, 215.97it/s]

finished frames 8927400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488022/1666666 [2:19:03<14:13, 209.22it/s]

finished frames 8928000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488132/1666666 [2:19:03<14:00, 212.39it/s]

finished frames 8928600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488242/1666666 [2:19:04<13:56, 213.32it/s]

finished frames 8929200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488330/1666666 [2:19:04<13:54, 213.59it/s]

finished frames 8929800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488440/1666666 [2:19:04<13:51, 214.36it/s]

finished frames 8930400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488528/1666666 [2:19:05<13:51, 214.18it/s]

finished frames 8931000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488616/1666666 [2:19:05<15:03, 197.16it/s]

finished frames 8931600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488726/1666666 [2:19:06<15:07, 196.08it/s]

finished frames 8932200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488836/1666666 [2:19:06<14:04, 210.60it/s]

finished frames 8932800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1488924/1666666 [2:19:07<13:54, 213.06it/s]

finished frames 8933400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489034/1666666 [2:19:07<14:03, 210.54it/s]

finished frames 8934000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489144/1666666 [2:19:08<13:51, 213.47it/s]

finished frames 8934600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489232/1666666 [2:19:08<13:49, 213.91it/s]

finished frames 8935200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489342/1666666 [2:19:09<13:44, 214.97it/s]

finished frames 8935800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489430/1666666 [2:19:09<13:48, 213.96it/s]

finished frames 8936400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489540/1666666 [2:19:10<13:46, 214.37it/s]

finished frames 8937000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489628/1666666 [2:19:10<13:47, 213.88it/s]

finished frames 8937600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489738/1666666 [2:19:11<13:47, 213.81it/s]

finished frames 8938200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489826/1666666 [2:19:11<13:47, 213.62it/s]

finished frames 8938800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1489936/1666666 [2:19:12<13:49, 212.99it/s]

finished frames 8939400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490024/1666666 [2:19:12<14:05, 208.89it/s]

finished frames 8940000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490134/1666666 [2:19:12<13:49, 212.85it/s]

finished frames 8940600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490222/1666666 [2:19:13<13:49, 212.60it/s]

finished frames 8941200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490332/1666666 [2:19:13<13:42, 214.48it/s]

finished frames 8941800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490442/1666666 [2:19:14<13:42, 214.31it/s]

finished frames 8942400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490530/1666666 [2:19:14<13:43, 213.84it/s]

finished frames 8943000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490640/1666666 [2:19:15<13:42, 214.09it/s]

finished frames 8943600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490728/1666666 [2:19:15<13:42, 213.93it/s]

finished frames 8944200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490838/1666666 [2:19:16<13:41, 214.06it/s]

finished frames 8944800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1490926/1666666 [2:19:16<13:44, 213.22it/s]

finished frames 8945400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1491036/1666666 [2:19:17<14:46, 198.14it/s]

finished frames 8946000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1491123/1666666 [2:19:17<15:10, 192.88it/s]

finished frames 8946600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1491232/1666666 [2:19:18<13:56, 209.82it/s]

finished frames 8947200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1491342/1666666 [2:19:18<13:41, 213.54it/s]

finished frames 8947800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1491430/1666666 [2:19:19<13:40, 213.65it/s]

finished frames 8948400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1491540/1666666 [2:19:19<13:37, 214.25it/s]

finished frames 8949000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 89%|████████▉ | 1491628/1666666 [2:19:20<13:38, 213.91it/s]

finished frames 8949600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1491738/1666666 [2:19:20<13:31, 215.45it/s]

finished frames 8950200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1491826/1666666 [2:19:20<13:30, 215.69it/s]

finished frames 8950800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1491936/1666666 [2:19:21<13:35, 214.15it/s]

finished frames 8951400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492023/1666666 [2:19:21<13:57, 208.41it/s]

finished frames 8952000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492133/1666666 [2:19:22<13:40, 212.74it/s]

finished frames 8952600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492243/1666666 [2:19:22<13:34, 214.24it/s]

finished frames 8953200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492331/1666666 [2:19:23<13:37, 213.38it/s]

finished frames 8953800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492441/1666666 [2:19:23<13:34, 213.99it/s]

finished frames 8954400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492529/1666666 [2:19:24<13:35, 213.62it/s]

finished frames 8955000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492639/1666666 [2:19:24<13:33, 213.86it/s]

finished frames 8955600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492727/1666666 [2:19:25<13:35, 213.36it/s]

finished frames 8956200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492837/1666666 [2:19:25<13:31, 214.10it/s]

finished frames 8956800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1492925/1666666 [2:19:26<13:34, 213.35it/s]

finished frames 8957400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493035/1666666 [2:19:26<13:47, 209.93it/s]

finished frames 8958000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493123/1666666 [2:19:27<13:35, 212.84it/s]

finished frames 8958600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493233/1666666 [2:19:27<13:28, 214.64it/s]

finished frames 8959200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493343/1666666 [2:19:28<13:29, 214.17it/s]

finished frames 8959800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493431/1666666 [2:19:28<14:19, 201.52it/s]

finished frames 8960400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493541/1666666 [2:19:29<13:28, 214.26it/s]

finished frames 8961000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493629/1666666 [2:19:29<13:27, 214.36it/s]

finished frames 8961600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493739/1666666 [2:19:29<13:28, 213.94it/s]

finished frames 8962200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493827/1666666 [2:19:30<13:30, 213.34it/s]

finished frames 8962800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1493937/1666666 [2:19:30<13:28, 213.57it/s]

finished frames 8963400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494025/1666666 [2:19:31<13:47, 208.60it/s]

finished frames 8964000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494135/1666666 [2:19:31<13:32, 212.30it/s]

finished frames 8964600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494223/1666666 [2:19:32<13:32, 212.21it/s]

finished frames 8965200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494333/1666666 [2:19:32<13:24, 214.22it/s]

finished frames 8965800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494443/1666666 [2:19:33<13:23, 214.40it/s]

finished frames 8966400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494531/1666666 [2:19:33<13:26, 213.51it/s]

finished frames 8967000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494641/1666666 [2:19:34<13:24, 213.76it/s]

finished frames 8967600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494729/1666666 [2:19:34<13:27, 213.00it/s]

finished frames 8968200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494839/1666666 [2:19:35<13:24, 213.66it/s]

finished frames 8968800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1494927/1666666 [2:19:35<13:24, 213.58it/s]

finished frames 8969400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495037/1666666 [2:19:36<13:41, 208.99it/s]

finished frames 8970000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495125/1666666 [2:19:36<13:27, 212.33it/s]

finished frames 8970600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495235/1666666 [2:19:36<13:23, 213.41it/s]

finished frames 8971200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495323/1666666 [2:19:37<13:24, 212.90it/s]

finished frames 8971800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495433/1666666 [2:19:37<13:23, 213.06it/s]

finished frames 8972400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495543/1666666 [2:19:38<13:16, 214.87it/s]

finished frames 8973000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495631/1666666 [2:19:38<13:20, 213.63it/s]

finished frames 8973600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495741/1666666 [2:19:39<13:17, 214.28it/s]

finished frames 8974200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495829/1666666 [2:19:39<14:05, 202.03it/s]

finished frames 8974800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1495939/1666666 [2:19:40<13:25, 211.90it/s]

finished frames 8975400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496027/1666666 [2:19:40<13:40, 207.99it/s]

finished frames 8976000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496137/1666666 [2:19:41<13:22, 212.52it/s]

finished frames 8976600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496225/1666666 [2:19:41<13:21, 212.70it/s]

finished frames 8977200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496335/1666666 [2:19:42<13:18, 213.33it/s]

finished frames 8977800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496423/1666666 [2:19:42<13:21, 212.43it/s]

finished frames 8978400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496533/1666666 [2:19:43<13:13, 214.29it/s]

finished frames 8979000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496643/1666666 [2:19:43<13:14, 213.88it/s]

finished frames 8979600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496731/1666666 [2:19:44<13:16, 213.26it/s]

finished frames 8980200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496841/1666666 [2:19:44<13:12, 214.37it/s]

finished frames 8980800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1496929/1666666 [2:19:44<13:11, 214.34it/s]

finished frames 8981400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497039/1666666 [2:19:45<13:30, 209.39it/s]

finished frames 8982000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497127/1666666 [2:19:45<13:17, 212.48it/s]

finished frames 8982600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497237/1666666 [2:19:46<13:11, 213.94it/s]

finished frames 8983200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497325/1666666 [2:19:46<13:13, 213.36it/s]

finished frames 8983800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497434/1666666 [2:19:47<13:35, 207.46it/s]

finished frames 8984400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497539/1666666 [2:19:47<13:46, 204.69it/s]

finished frames 8985000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497623/1666666 [2:19:48<13:39, 206.23it/s]

finished frames 8985600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497728/1666666 [2:19:48<13:43, 205.19it/s]

finished frames 8986200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497833/1666666 [2:19:49<13:41, 205.57it/s]

finished frames 8986800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1497938/1666666 [2:19:49<13:38, 206.10it/s]

finished frames 8987400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498022/1666666 [2:19:50<14:00, 200.76it/s]

finished frames 8988000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498127/1666666 [2:19:50<13:52, 202.45it/s]

finished frames 8988600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498233/1666666 [2:19:51<13:36, 206.23it/s]

finished frames 8989200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498338/1666666 [2:19:51<14:18, 196.09it/s]

finished frames 8989800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498422/1666666 [2:19:52<13:47, 203.34it/s]

finished frames 8990400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498527/1666666 [2:19:52<13:29, 207.65it/s]

finished frames 8991000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498632/1666666 [2:19:53<13:27, 208.02it/s]

finished frames 8991600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498737/1666666 [2:19:53<13:42, 204.19it/s]

finished frames 8992200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498824/1666666 [2:19:54<13:17, 210.42it/s]

finished frames 8992800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1498934/1666666 [2:19:54<13:17, 210.28it/s]

finished frames 8993400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499022/1666666 [2:19:55<13:38, 204.79it/s]

finished frames 8994000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499127/1666666 [2:19:55<13:31, 206.52it/s]

finished frames 8994600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499235/1666666 [2:19:56<13:15, 210.51it/s]

finished frames 8995200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499323/1666666 [2:19:56<13:02, 213.88it/s]

finished frames 8995800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499433/1666666 [2:19:57<12:56, 215.25it/s]

finished frames 8996400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499543/1666666 [2:19:57<12:58, 214.66it/s]

finished frames 8997000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499631/1666666 [2:19:57<12:59, 214.28it/s]

finished frames 8997600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499741/1666666 [2:19:58<13:03, 212.97it/s]

finished frames 8998200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499828/1666666 [2:19:58<13:17, 209.22it/s]

finished frames 8998800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|████████▉ | 1499938/1666666 [2:19:59<13:12, 210.47it/s]

finished frames 8999400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500023/1666666 [2:19:59<13:42, 202.67it/s]

finished frames 9000000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500131/1666666 [2:20:00<13:13, 209.88it/s]

finished frames 9000600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500241/1666666 [2:20:00<12:57, 214.05it/s]

finished frames 9001200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500329/1666666 [2:20:01<12:54, 214.81it/s]

finished frames 9001800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500439/1666666 [2:20:01<13:09, 210.55it/s]

finished frames 9002400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500527/1666666 [2:20:02<13:09, 210.36it/s]

finished frames 9003000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500635/1666666 [2:20:02<13:07, 210.88it/s]

finished frames 9003600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500722/1666666 [2:20:03<13:31, 204.59it/s]

finished frames 9004200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500829/1666666 [2:20:03<13:27, 205.31it/s]

finished frames 9004800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1500939/1666666 [2:20:04<13:03, 211.64it/s]

finished frames 9005400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501027/1666666 [2:20:04<13:18, 207.51it/s]

finished frames 9006000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501137/1666666 [2:20:05<13:03, 211.38it/s]

finished frames 9006600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501225/1666666 [2:20:05<13:03, 211.12it/s]

finished frames 9007200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501335/1666666 [2:20:06<12:57, 212.72it/s]

finished frames 9007800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501423/1666666 [2:20:06<12:56, 212.79it/s]

finished frames 9008400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501533/1666666 [2:20:06<12:50, 214.44it/s]

finished frames 9009000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501643/1666666 [2:20:07<12:51, 214.01it/s]

finished frames 9009600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501731/1666666 [2:20:07<12:53, 213.35it/s]

finished frames 9010200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501841/1666666 [2:20:08<12:51, 213.71it/s]

finished frames 9010800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1501929/1666666 [2:20:08<12:51, 213.58it/s]

finished frames 9011400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502039/1666666 [2:20:09<13:05, 209.55it/s]

finished frames 9012000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502127/1666666 [2:20:09<12:57, 211.60it/s]

finished frames 9012600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502237/1666666 [2:20:10<12:50, 213.33it/s]

finished frames 9013200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502325/1666666 [2:20:10<12:47, 214.15it/s]

finished frames 9013800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502435/1666666 [2:20:11<12:43, 214.99it/s]

finished frames 9014400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502523/1666666 [2:20:11<12:48, 213.67it/s]

finished frames 9015000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502633/1666666 [2:20:12<12:45, 214.32it/s]

finished frames 9015600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502743/1666666 [2:20:12<12:45, 214.06it/s]

finished frames 9016200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502831/1666666 [2:20:13<12:47, 213.33it/s]

finished frames 9016800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1502919/1666666 [2:20:13<12:49, 212.72it/s]

finished frames 9017400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503027/1666666 [2:20:14<13:26, 202.83it/s]

finished frames 9018000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503136/1666666 [2:20:14<12:59, 209.69it/s]

finished frames 9018600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503224/1666666 [2:20:14<12:57, 210.27it/s]

finished frames 9019200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503334/1666666 [2:20:15<12:43, 214.05it/s]

finished frames 9019800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503422/1666666 [2:20:15<12:45, 213.12it/s]

finished frames 9020400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503532/1666666 [2:20:16<12:50, 211.63it/s]

finished frames 9021000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503642/1666666 [2:20:16<12:50, 211.63it/s]

finished frames 9021600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503730/1666666 [2:20:17<12:53, 210.67it/s]

finished frames 9022200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503840/1666666 [2:20:17<12:45, 212.57it/s]

finished frames 9022800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1503928/1666666 [2:20:18<12:45, 212.49it/s]

finished frames 9023400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504038/1666666 [2:20:18<13:02, 207.88it/s]

finished frames 9024000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504126/1666666 [2:20:19<12:52, 210.38it/s]

finished frames 9024600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504236/1666666 [2:20:19<12:44, 212.44it/s]

finished frames 9025200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504324/1666666 [2:20:20<12:45, 212.02it/s]

finished frames 9025800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504434/1666666 [2:20:20<12:47, 211.37it/s]

finished frames 9026400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504522/1666666 [2:20:21<12:48, 211.02it/s]

finished frames 9027000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504632/1666666 [2:20:21<12:41, 212.81it/s]

finished frames 9027600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504742/1666666 [2:20:22<12:46, 211.32it/s]

finished frames 9028200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504830/1666666 [2:20:22<12:48, 210.71it/s]

finished frames 9028800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1504937/1666666 [2:20:23<12:57, 208.09it/s]

finished frames 9029400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505021/1666666 [2:20:23<13:22, 201.43it/s]

finished frames 9030000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505127/1666666 [2:20:23<13:01, 206.67it/s]

finished frames 9030600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505232/1666666 [2:20:24<12:57, 207.51it/s]

finished frames 9031200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505337/1666666 [2:20:25<13:30, 199.10it/s]

finished frames 9031800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505421/1666666 [2:20:25<14:20, 187.45it/s]

finished frames 9032400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505526/1666666 [2:20:25<13:10, 203.95it/s]

finished frames 9033000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505631/1666666 [2:20:26<13:00, 206.42it/s]

finished frames 9033600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505736/1666666 [2:20:26<12:58, 206.80it/s]

finished frames 9034200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505841/1666666 [2:20:27<12:57, 206.98it/s]

finished frames 9034800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1505925/1666666 [2:20:27<13:00, 205.87it/s]

finished frames 9035400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506030/1666666 [2:20:28<13:13, 202.34it/s]

finished frames 9036000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506135/1666666 [2:20:28<12:59, 205.88it/s]

finished frames 9036600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506240/1666666 [2:20:29<12:55, 206.79it/s]

finished frames 9037200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506324/1666666 [2:20:29<12:56, 206.56it/s]

finished frames 9037800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506429/1666666 [2:20:30<12:55, 206.72it/s]

finished frames 9038400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506535/1666666 [2:20:30<12:52, 207.37it/s]

finished frames 9039000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506640/1666666 [2:20:31<12:53, 206.85it/s]

finished frames 9039600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506724/1666666 [2:20:31<12:57, 205.81it/s]

finished frames 9040200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506829/1666666 [2:20:32<12:54, 206.49it/s]

finished frames 9040800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1506934/1666666 [2:20:32<12:51, 207.10it/s]

finished frames 9041400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507039/1666666 [2:20:33<13:11, 201.78it/s]

finished frames 9042000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507123/1666666 [2:20:33<12:55, 205.63it/s]

finished frames 9042600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507230/1666666 [2:20:34<12:44, 208.67it/s]

finished frames 9043200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507335/1666666 [2:20:34<12:49, 207.08it/s]

finished frames 9043800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507443/1666666 [2:20:35<12:43, 208.51it/s]

finished frames 9044400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507528/1666666 [2:20:35<12:46, 207.51it/s]

finished frames 9045000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507633/1666666 [2:20:36<13:35, 194.96it/s]

finished frames 9045600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507738/1666666 [2:20:36<12:58, 204.13it/s]

finished frames 9046200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507822/1666666 [2:20:37<12:48, 206.64it/s]

finished frames 9046800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1507928/1666666 [2:20:37<12:39, 209.04it/s]

finished frames 9047400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1508033/1666666 [2:20:38<13:04, 202.25it/s]

finished frames 9048000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1508139/1666666 [2:20:38<12:42, 207.83it/s]

finished frames 9048600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 90%|█████████ | 1508226/1666666 [2:20:39<12:35, 209.61it/s]

finished frames 9049200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1508336/1666666 [2:20:39<12:32, 210.37it/s]

finished frames 9049800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1508424/1666666 [2:20:39<12:26, 211.88it/s]

finished frames 9050400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1508534/1666666 [2:20:40<12:24, 212.36it/s]

finished frames 9051000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1508622/1666666 [2:20:40<12:25, 212.01it/s]

finished frames 9051600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1508732/1666666 [2:20:41<12:18, 213.74it/s]

finished frames 9052200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1508842/1666666 [2:20:41<12:21, 212.87it/s]

finished frames 9052800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1508930/1666666 [2:20:42<12:24, 211.96it/s]

finished frames 9053400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509040/1666666 [2:20:42<12:40, 207.26it/s]

finished frames 9054000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509127/1666666 [2:20:43<12:32, 209.44it/s]

finished frames 9054600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509237/1666666 [2:20:43<12:25, 211.23it/s]

finished frames 9055200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509325/1666666 [2:20:44<12:24, 211.42it/s]

finished frames 9055800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509435/1666666 [2:20:44<12:22, 211.67it/s]

finished frames 9056400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509522/1666666 [2:20:45<12:37, 207.50it/s]

finished frames 9057000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509627/1666666 [2:20:45<12:44, 205.37it/s]

finished frames 9057600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509732/1666666 [2:20:46<12:43, 205.51it/s]

finished frames 9058200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509837/1666666 [2:20:46<12:42, 205.68it/s]

finished frames 9058800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1509942/1666666 [2:20:47<12:40, 206.21it/s]

finished frames 9059400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510026/1666666 [2:20:47<13:42, 190.46it/s]

finished frames 9060000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510134/1666666 [2:20:48<12:54, 202.04it/s]

finished frames 9060600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510222/1666666 [2:20:48<12:28, 209.11it/s]

finished frames 9061200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510332/1666666 [2:20:49<12:15, 212.42it/s]

finished frames 9061800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510442/1666666 [2:20:49<12:16, 212.23it/s]

finished frames 9062400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510530/1666666 [2:20:50<12:15, 212.17it/s]

finished frames 9063000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510640/1666666 [2:20:50<12:14, 212.43it/s]

finished frames 9063600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510728/1666666 [2:20:51<12:15, 212.03it/s]

finished frames 9064200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510838/1666666 [2:20:51<12:13, 212.30it/s]

finished frames 9064800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1510926/1666666 [2:20:51<12:13, 212.40it/s]

finished frames 9065400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511035/1666666 [2:20:52<12:44, 203.63it/s]

finished frames 9066000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511140/1666666 [2:20:53<12:33, 206.38it/s]

finished frames 9066600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511224/1666666 [2:20:53<12:32, 206.48it/s]

finished frames 9067200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511330/1666666 [2:20:53<12:22, 209.12it/s]

finished frames 9067800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511440/1666666 [2:20:54<12:12, 211.99it/s]

finished frames 9068400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511528/1666666 [2:20:54<12:13, 211.43it/s]

finished frames 9069000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511638/1666666 [2:20:55<12:14, 211.02it/s]

finished frames 9069600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511726/1666666 [2:20:55<12:10, 212.00it/s]

finished frames 9070200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511836/1666666 [2:20:56<12:10, 211.93it/s]

finished frames 9070800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1511924/1666666 [2:20:56<12:08, 212.48it/s]

finished frames 9071400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512034/1666666 [2:20:57<12:25, 207.36it/s]

finished frames 9072000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512122/1666666 [2:20:57<12:12, 211.11it/s]

finished frames 9072600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512232/1666666 [2:20:58<12:01, 213.92it/s]

finished frames 9073200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512320/1666666 [2:20:58<13:02, 197.21it/s]

finished frames 9073800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512429/1666666 [2:20:59<12:16, 209.41it/s]

finished frames 9074400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512539/1666666 [2:20:59<12:20, 208.07it/s]

finished frames 9075000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512627/1666666 [2:21:00<12:08, 211.31it/s]

finished frames 9075600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512737/1666666 [2:21:00<12:02, 212.98it/s]

finished frames 9076200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512825/1666666 [2:21:01<12:00, 213.62it/s]

finished frames 9076800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1512935/1666666 [2:21:01<11:58, 213.87it/s]

finished frames 9077400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513022/1666666 [2:21:01<12:36, 203.04it/s]

finished frames 9078000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513132/1666666 [2:21:02<12:03, 212.30it/s]

finished frames 9078600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513242/1666666 [2:21:02<11:59, 213.17it/s]

finished frames 9079200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513330/1666666 [2:21:03<11:58, 213.27it/s]

finished frames 9079800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513440/1666666 [2:21:03<11:58, 213.28it/s]

finished frames 9080400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513528/1666666 [2:21:04<12:01, 212.28it/s]

finished frames 9081000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513638/1666666 [2:21:04<11:57, 213.17it/s]

finished frames 9081600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513726/1666666 [2:21:05<11:58, 212.78it/s]

finished frames 9082200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513836/1666666 [2:21:05<11:51, 214.89it/s]

finished frames 9082800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1513924/1666666 [2:21:06<11:57, 213.00it/s]

finished frames 9083400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514034/1666666 [2:21:06<12:11, 208.70it/s]

finished frames 9084000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514144/1666666 [2:21:07<11:53, 213.75it/s]

finished frames 9084600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514232/1666666 [2:21:07<11:52, 213.87it/s]

finished frames 9085200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514342/1666666 [2:21:08<11:48, 215.01it/s]

finished frames 9085800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514430/1666666 [2:21:08<11:54, 213.17it/s]

finished frames 9086400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514540/1666666 [2:21:09<11:54, 213.01it/s]

finished frames 9087000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514628/1666666 [2:21:09<11:56, 212.33it/s]

finished frames 9087600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514716/1666666 [2:21:09<12:56, 195.76it/s]

finished frames 9088200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514825/1666666 [2:21:10<12:09, 208.21it/s]

finished frames 9088800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1514930/1666666 [2:21:10<12:14, 206.62it/s]

finished frames 9089400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515036/1666666 [2:21:11<12:27, 202.82it/s]

finished frames 9090000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515141/1666666 [2:21:12<12:18, 205.18it/s]

finished frames 9090600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515225/1666666 [2:21:12<12:19, 204.84it/s]

finished frames 9091200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515330/1666666 [2:21:12<12:12, 206.49it/s]

finished frames 9091800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515435/1666666 [2:21:13<12:13, 206.20it/s]

finished frames 9092400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515540/1666666 [2:21:13<12:15, 205.56it/s]

finished frames 9093000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515624/1666666 [2:21:14<12:16, 205.10it/s]

finished frames 9093600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515729/1666666 [2:21:14<12:12, 206.14it/s]

finished frames 9094200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515834/1666666 [2:21:15<12:11, 206.31it/s]

finished frames 9094800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1515939/1666666 [2:21:15<12:10, 206.45it/s]

finished frames 9095400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516023/1666666 [2:21:16<12:30, 200.75it/s]

finished frames 9096000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516128/1666666 [2:21:16<12:15, 204.71it/s]

finished frames 9096600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516233/1666666 [2:21:17<12:12, 205.32it/s]

finished frames 9097200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516338/1666666 [2:21:17<12:05, 207.19it/s]

finished frames 9097800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516422/1666666 [2:21:18<12:07, 206.56it/s]

finished frames 9098400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516530/1666666 [2:21:18<11:57, 209.21it/s]

finished frames 9099000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516635/1666666 [2:21:19<12:02, 207.63it/s]

finished frames 9099600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516741/1666666 [2:21:19<12:00, 208.09it/s]

finished frames 9100200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516825/1666666 [2:21:20<11:59, 208.17it/s]

finished frames 9100800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1516930/1666666 [2:21:20<12:01, 207.60it/s]

finished frames 9101400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517014/1666666 [2:21:21<12:26, 200.50it/s]

finished frames 9102000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517140/1666666 [2:21:21<12:15, 203.20it/s]

finished frames 9102600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517224/1666666 [2:21:22<12:06, 205.81it/s]

finished frames 9103200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517329/1666666 [2:21:22<12:02, 206.82it/s]

finished frames 9103800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517434/1666666 [2:21:23<11:56, 208.22it/s]

finished frames 9104400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517539/1666666 [2:21:23<11:58, 207.51it/s]

finished frames 9105000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517623/1666666 [2:21:24<12:01, 206.54it/s]

finished frames 9105600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517729/1666666 [2:21:24<11:54, 208.51it/s]

finished frames 9106200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517834/1666666 [2:21:25<11:56, 207.71it/s]

finished frames 9106800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1517939/1666666 [2:21:25<11:59, 206.70it/s]

finished frames 9107400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518023/1666666 [2:21:25<12:15, 202.12it/s]

finished frames 9108000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518128/1666666 [2:21:26<12:01, 205.90it/s]

finished frames 9108600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518233/1666666 [2:21:27<11:57, 206.85it/s]

finished frames 9109200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518338/1666666 [2:21:27<11:55, 207.19it/s]

finished frames 9109800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518422/1666666 [2:21:27<11:57, 206.67it/s]

finished frames 9110400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518528/1666666 [2:21:28<11:48, 209.00it/s]

finished frames 9111000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518633/1666666 [2:21:28<11:53, 207.34it/s]

finished frames 9111600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518738/1666666 [2:21:29<11:52, 207.54it/s]

finished frames 9112200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518822/1666666 [2:21:29<11:55, 206.63it/s]

finished frames 9112800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1518927/1666666 [2:21:30<11:55, 206.54it/s]

finished frames 9113400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519032/1666666 [2:21:30<12:11, 201.76it/s]

finished frames 9114000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519137/1666666 [2:21:31<11:54, 206.61it/s]

finished frames 9114600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519222/1666666 [2:21:31<11:49, 207.73it/s]

finished frames 9115200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519329/1666666 [2:21:32<11:45, 208.90it/s]

finished frames 9115800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519434/1666666 [2:21:32<11:50, 207.28it/s]

finished frames 9116400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519540/1666666 [2:21:33<11:47, 207.95it/s]

finished frames 9117000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519624/1666666 [2:21:33<11:47, 207.97it/s]

finished frames 9117600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519729/1666666 [2:21:34<11:47, 207.56it/s]

finished frames 9118200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519834/1666666 [2:21:34<11:47, 207.44it/s]

finished frames 9118800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1519940/1666666 [2:21:35<11:42, 208.99it/s]

finished frames 9119400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1520027/1666666 [2:21:35<11:56, 204.69it/s]

finished frames 9120000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1520133/1666666 [2:21:36<11:44, 208.01it/s]

finished frames 9120600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1520239/1666666 [2:21:36<11:42, 208.44it/s]

finished frames 9121200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1520323/1666666 [2:21:37<11:40, 208.95it/s]

finished frames 9121800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1520428/1666666 [2:21:37<11:45, 207.27it/s]

finished frames 9122400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1520533/1666666 [2:21:38<11:43, 207.62it/s]

finished frames 9123000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1520638/1666666 [2:21:38<11:42, 207.85it/s]

finished frames 9123600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1520726/1666666 [2:21:39<11:30, 211.33it/s]

finished frames 9124200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████ | 1520832/1666666 [2:21:39<11:38, 208.88it/s]

finished frames 9124800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1520940/1666666 [2:21:40<11:34, 209.85it/s]

finished frames 9125400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521026/1666666 [2:21:40<11:55, 203.50it/s]

finished frames 9126000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521135/1666666 [2:21:40<11:24, 212.67it/s]

finished frames 9126600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521223/1666666 [2:21:41<11:26, 211.96it/s]

finished frames 9127200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521333/1666666 [2:21:41<11:29, 210.76it/s]

finished frames 9127800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521443/1666666 [2:21:42<11:29, 210.62it/s]

finished frames 9128400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521531/1666666 [2:21:42<11:31, 209.77it/s]

finished frames 9129000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521640/1666666 [2:21:43<11:29, 210.40it/s]

finished frames 9129600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521726/1666666 [2:21:43<13:36, 177.51it/s]

finished frames 9130200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521832/1666666 [2:21:44<12:52, 187.61it/s]

finished frames 9130800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1521937/1666666 [2:21:44<11:47, 204.44it/s]

finished frames 9131400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522022/1666666 [2:21:45<11:53, 202.79it/s]

finished frames 9132000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522127/1666666 [2:21:45<11:36, 207.63it/s]

finished frames 9132600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522233/1666666 [2:21:46<11:31, 208.92it/s]

finished frames 9133200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522339/1666666 [2:21:46<11:39, 206.29it/s]

finished frames 9133800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522423/1666666 [2:21:47<11:44, 204.65it/s]

finished frames 9134400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522528/1666666 [2:21:47<11:37, 206.59it/s]

finished frames 9135000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522636/1666666 [2:21:48<11:27, 209.45it/s]

finished frames 9135600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522724/1666666 [2:21:48<11:20, 211.40it/s]

finished frames 9136200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522834/1666666 [2:21:49<11:15, 212.81it/s]

finished frames 9136800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1522922/1666666 [2:21:49<11:18, 211.95it/s]

finished frames 9137400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523032/1666666 [2:21:50<11:24, 209.80it/s]

finished frames 9138000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523142/1666666 [2:21:50<11:15, 212.36it/s]

finished frames 9138600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523230/1666666 [2:21:51<11:17, 211.63it/s]

finished frames 9139200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523340/1666666 [2:21:51<11:12, 213.07it/s]

finished frames 9139800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523428/1666666 [2:21:51<11:20, 210.34it/s]

finished frames 9140400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523538/1666666 [2:21:52<11:20, 210.48it/s]

finished frames 9141000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523624/1666666 [2:21:52<11:24, 208.95it/s]

finished frames 9141600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523734/1666666 [2:21:53<11:16, 211.19it/s]

finished frames 9142200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523822/1666666 [2:21:53<11:32, 206.42it/s]

finished frames 9142800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1523931/1666666 [2:21:54<11:16, 211.03it/s]

finished frames 9143400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524038/1666666 [2:21:54<11:34, 205.33it/s]

finished frames 9144000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524123/1666666 [2:21:55<11:36, 204.66it/s]

finished frames 9144600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524228/1666666 [2:21:55<12:03, 196.91it/s]

finished frames 9145200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524337/1666666 [2:21:56<11:21, 208.79it/s]

finished frames 9145800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524425/1666666 [2:21:56<11:12, 211.41it/s]

finished frames 9146400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524535/1666666 [2:21:57<11:10, 211.98it/s]

finished frames 9147000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524623/1666666 [2:21:57<11:10, 211.86it/s]

finished frames 9147600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524733/1666666 [2:21:58<11:07, 212.59it/s]

finished frames 9148200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524843/1666666 [2:21:58<11:01, 214.38it/s]

finished frames 9148800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 91%|█████████▏| 1524931/1666666 [2:21:59<11:03, 213.77it/s]

finished frames 9149400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525021/1666666 [2:21:59<11:16, 209.31it/s]

finished frames 9150000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525130/1666666 [2:22:00<11:14, 209.72it/s]

finished frames 9150600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525241/1666666 [2:22:00<10:51, 217.00it/s]

finished frames 9151200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525329/1666666 [2:22:01<10:50, 217.40it/s]

finished frames 9151800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525439/1666666 [2:22:01<10:46, 218.48it/s]

finished frames 9152400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525527/1666666 [2:22:01<11:01, 213.36it/s]

finished frames 9153000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525637/1666666 [2:22:02<10:56, 214.95it/s]

finished frames 9153600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525725/1666666 [2:22:02<11:04, 212.09it/s]

finished frames 9154200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525835/1666666 [2:22:03<11:04, 211.98it/s]

finished frames 9154800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1525923/1666666 [2:22:03<11:02, 212.35it/s]

finished frames 9155400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526032/1666666 [2:22:04<11:20, 206.60it/s]

finished frames 9156000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526140/1666666 [2:22:04<11:06, 210.69it/s]

finished frames 9156600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526228/1666666 [2:22:05<11:03, 211.51it/s]

finished frames 9157200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526338/1666666 [2:22:05<11:03, 211.43it/s]

finished frames 9157800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526426/1666666 [2:22:06<11:04, 211.11it/s]

finished frames 9158400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526536/1666666 [2:22:06<11:16, 207.24it/s]

finished frames 9159000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526624/1666666 [2:22:07<11:02, 211.50it/s]

finished frames 9159600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526734/1666666 [2:22:07<10:57, 212.77it/s]

finished frames 9160200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526822/1666666 [2:22:08<10:56, 212.99it/s]

finished frames 9160800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1526932/1666666 [2:22:08<10:51, 214.41it/s]

finished frames 9161400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527020/1666666 [2:22:09<11:17, 206.12it/s]

finished frames 9162000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527130/1666666 [2:22:09<11:03, 210.27it/s]

finished frames 9162600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527238/1666666 [2:22:10<11:11, 207.74it/s]

finished frames 9163200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527322/1666666 [2:22:10<11:14, 206.49it/s]

finished frames 9163800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527427/1666666 [2:22:10<11:13, 206.79it/s]

finished frames 9164400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527532/1666666 [2:22:11<11:16, 205.80it/s]

finished frames 9165000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527637/1666666 [2:22:12<11:12, 206.75it/s]

finished frames 9165600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527721/1666666 [2:22:12<11:15, 205.56it/s]

finished frames 9166200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527827/1666666 [2:22:12<11:08, 207.74it/s]

finished frames 9166800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1527932/1666666 [2:22:13<11:12, 206.44it/s]

finished frames 9167400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528037/1666666 [2:22:13<11:31, 200.51it/s]

finished frames 9168000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528142/1666666 [2:22:14<11:15, 205.12it/s]

finished frames 9168600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528226/1666666 [2:22:14<11:15, 204.89it/s]

finished frames 9169200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528331/1666666 [2:22:15<11:13, 205.37it/s]

finished frames 9169800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528436/1666666 [2:22:15<11:10, 206.03it/s]

finished frames 9170400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528541/1666666 [2:22:16<11:12, 205.52it/s]

finished frames 9171000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528625/1666666 [2:22:16<11:11, 205.57it/s]

finished frames 9171600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528730/1666666 [2:22:17<11:09, 206.11it/s]

finished frames 9172200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528835/1666666 [2:22:17<11:35, 198.09it/s]

finished frames 9172800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1528940/1666666 [2:22:18<11:42, 196.13it/s]

finished frames 9173400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529023/1666666 [2:22:18<11:40, 196.56it/s]

finished frames 9174000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529128/1666666 [2:22:19<11:12, 204.55it/s]

finished frames 9174600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529233/1666666 [2:22:19<11:07, 206.01it/s]

finished frames 9175200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529338/1666666 [2:22:20<11:11, 204.48it/s]

finished frames 9175800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529422/1666666 [2:22:20<11:29, 199.14it/s]

finished frames 9176400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529523/1666666 [2:22:21<11:31, 198.19it/s]

finished frames 9177000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529627/1666666 [2:22:21<11:07, 205.23it/s]

finished frames 9177600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529732/1666666 [2:22:22<11:03, 206.31it/s]

finished frames 9178200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529837/1666666 [2:22:22<11:02, 206.43it/s]

finished frames 9178800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1529921/1666666 [2:22:23<11:05, 205.62it/s]

finished frames 9179400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530027/1666666 [2:22:23<11:16, 202.05it/s]

finished frames 9180000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530136/1666666 [2:22:24<10:54, 208.66it/s]

finished frames 9180600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530224/1666666 [2:22:24<10:49, 210.07it/s]

finished frames 9181200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530334/1666666 [2:22:25<10:47, 210.71it/s]

finished frames 9181800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530422/1666666 [2:22:25<10:50, 209.35it/s]

finished frames 9182400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530532/1666666 [2:22:26<10:41, 212.32it/s]

finished frames 9183000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530642/1666666 [2:22:26<10:41, 212.15it/s]

finished frames 9183600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530730/1666666 [2:22:27<10:43, 211.36it/s]

finished frames 9184200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530840/1666666 [2:22:27<10:41, 211.72it/s]

finished frames 9184800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1530928/1666666 [2:22:28<10:42, 211.16it/s]

finished frames 9185400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531038/1666666 [2:22:28<10:54, 207.11it/s]

finished frames 9186000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531126/1666666 [2:22:28<11:41, 193.31it/s]

finished frames 9186600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531234/1666666 [2:22:29<11:51, 190.35it/s]

finished frames 9187200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531322/1666666 [2:22:29<10:56, 206.05it/s]

finished frames 9187800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531432/1666666 [2:22:30<10:41, 210.68it/s]

finished frames 9188400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531542/1666666 [2:22:30<10:38, 211.63it/s]

finished frames 9189000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531630/1666666 [2:22:31<10:38, 211.50it/s]

finished frames 9189600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531740/1666666 [2:22:31<10:37, 211.71it/s]

finished frames 9190200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531828/1666666 [2:22:32<10:40, 210.61it/s]

finished frames 9190800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1531938/1666666 [2:22:32<10:37, 211.26it/s]

finished frames 9191400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532025/1666666 [2:22:33<10:54, 205.65it/s]

finished frames 9192000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532131/1666666 [2:22:33<10:44, 208.66it/s]

finished frames 9192600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532241/1666666 [2:22:34<10:37, 210.84it/s]

finished frames 9193200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532329/1666666 [2:22:34<10:37, 210.68it/s]

finished frames 9193800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532438/1666666 [2:22:35<10:40, 209.49it/s]

finished frames 9194400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532522/1666666 [2:22:35<10:46, 207.40it/s]

finished frames 9195000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532629/1666666 [2:22:36<10:41, 209.07it/s]

finished frames 9195600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532735/1666666 [2:22:36<10:42, 208.35it/s]

finished frames 9196200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532840/1666666 [2:22:37<10:43, 208.01it/s]

finished frames 9196800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1532924/1666666 [2:22:37<10:42, 208.01it/s]

finished frames 9197400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533029/1666666 [2:22:38<10:59, 202.74it/s]

finished frames 9198000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533134/1666666 [2:22:38<10:45, 206.85it/s]

finished frames 9198600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533239/1666666 [2:22:39<10:41, 207.91it/s]

finished frames 9199200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533323/1666666 [2:22:39<10:41, 207.79it/s]

finished frames 9199800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533428/1666666 [2:22:40<10:42, 207.52it/s]

finished frames 9200400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533536/1666666 [2:22:40<10:37, 208.92it/s]

finished frames 9201000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533621/1666666 [2:22:40<10:38, 208.43it/s]

finished frames 9201600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533731/1666666 [2:22:41<10:31, 210.47it/s]

finished frames 9202200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533841/1666666 [2:22:41<10:33, 209.78it/s]

finished frames 9202800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1533926/1666666 [2:22:42<10:35, 208.91it/s]

finished frames 9203400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534032/1666666 [2:22:42<10:50, 203.85it/s]

finished frames 9204000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534138/1666666 [2:22:43<10:36, 208.10it/s]

finished frames 9204600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534223/1666666 [2:22:43<10:36, 208.23it/s]

finished frames 9205200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534330/1666666 [2:22:44<10:37, 207.55it/s]

finished frames 9205800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534439/1666666 [2:22:44<10:26, 210.91it/s]

finished frames 9206400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534526/1666666 [2:22:45<10:29, 209.84it/s]

finished frames 9207000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534636/1666666 [2:22:45<10:25, 211.18it/s]

finished frames 9207600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534724/1666666 [2:22:46<10:30, 209.31it/s]

finished frames 9208200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534829/1666666 [2:22:46<10:32, 208.40it/s]

finished frames 9208800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1534935/1666666 [2:22:47<10:33, 207.95it/s]

finished frames 9209400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535019/1666666 [2:22:47<10:56, 200.46it/s]

finished frames 9210000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535129/1666666 [2:22:48<10:28, 209.44it/s]

finished frames 9210600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535239/1666666 [2:22:48<10:25, 210.08it/s]

finished frames 9211200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535327/1666666 [2:22:49<10:24, 210.42it/s]

finished frames 9211800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535437/1666666 [2:22:49<10:22, 210.82it/s]

finished frames 9212400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535525/1666666 [2:22:50<10:24, 210.15it/s]

finished frames 9213000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535635/1666666 [2:22:50<10:24, 209.76it/s]

finished frames 9213600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535722/1666666 [2:22:50<10:26, 209.02it/s]

finished frames 9214200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535831/1666666 [2:22:51<10:25, 209.28it/s]

finished frames 9214800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1535915/1666666 [2:22:51<10:43, 203.18it/s]

finished frames 9215400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536039/1666666 [2:22:52<11:13, 193.96it/s]

finished frames 9216000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536123/1666666 [2:22:53<10:43, 202.98it/s]

finished frames 9216600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536229/1666666 [2:22:53<10:26, 208.15it/s]

finished frames 9217200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536334/1666666 [2:22:54<10:28, 207.23it/s]

finished frames 9217800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536439/1666666 [2:22:54<10:31, 206.26it/s]

finished frames 9218400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536523/1666666 [2:22:54<10:34, 205.13it/s]

finished frames 9219000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536628/1666666 [2:22:55<10:31, 206.02it/s]

finished frames 9219600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536733/1666666 [2:22:55<10:31, 205.79it/s]

finished frames 9220200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536838/1666666 [2:22:56<10:26, 207.13it/s]

finished frames 9220800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1536922/1666666 [2:22:56<10:24, 207.91it/s]

finished frames 9221400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537028/1666666 [2:22:57<10:41, 202.21it/s]

finished frames 9222000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537133/1666666 [2:22:57<10:30, 205.52it/s]

finished frames 9222600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537239/1666666 [2:22:58<10:23, 207.73it/s]

finished frames 9223200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537323/1666666 [2:22:58<10:25, 206.63it/s]

finished frames 9223800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537428/1666666 [2:22:59<10:25, 206.78it/s]

finished frames 9224400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537533/1666666 [2:22:59<10:22, 207.38it/s]

finished frames 9225000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537638/1666666 [2:23:00<10:21, 207.55it/s]

finished frames 9225600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537722/1666666 [2:23:00<10:21, 207.48it/s]

finished frames 9226200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537827/1666666 [2:23:01<10:22, 206.88it/s]

finished frames 9226800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1537932/1666666 [2:23:01<10:19, 207.72it/s]

finished frames 9227400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538037/1666666 [2:23:02<10:37, 201.80it/s]

finished frames 9228000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538122/1666666 [2:23:02<10:19, 207.49it/s]

finished frames 9228600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538230/1666666 [2:23:03<10:35, 202.15it/s]

finished frames 9229200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538338/1666666 [2:23:03<10:26, 204.86it/s]

finished frames 9229800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538424/1666666 [2:23:04<10:14, 208.72it/s]

finished frames 9230400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538534/1666666 [2:23:04<10:08, 210.57it/s]

finished frames 9231000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538622/1666666 [2:23:05<10:08, 210.57it/s]

finished frames 9231600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538732/1666666 [2:23:05<10:04, 211.75it/s]

finished frames 9232200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538842/1666666 [2:23:06<10:04, 211.44it/s]

finished frames 9232800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1538930/1666666 [2:23:06<10:06, 210.77it/s]

finished frames 9233400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539040/1666666 [2:23:07<10:16, 207.07it/s]

finished frames 9234000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539128/1666666 [2:23:07<10:04, 210.91it/s]

finished frames 9234600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539238/1666666 [2:23:08<10:01, 211.74it/s]

finished frames 9235200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539326/1666666 [2:23:08<10:03, 210.98it/s]

finished frames 9235800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539436/1666666 [2:23:08<10:03, 210.85it/s]

finished frames 9236400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539524/1666666 [2:23:09<10:01, 211.32it/s]

finished frames 9237000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539634/1666666 [2:23:09<09:58, 212.16it/s]

finished frames 9237600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539722/1666666 [2:23:10<09:55, 213.14it/s]

finished frames 9238200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539833/1666666 [2:23:10<09:48, 215.59it/s]

finished frames 9238800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1539943/1666666 [2:23:11<09:50, 214.56it/s]

finished frames 9239400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540031/1666666 [2:23:11<10:05, 209.08it/s]

finished frames 9240000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540141/1666666 [2:23:12<09:55, 212.38it/s]

finished frames 9240600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540229/1666666 [2:23:12<09:54, 212.61it/s]

finished frames 9241200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540339/1666666 [2:23:13<09:51, 213.69it/s]

finished frames 9241800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540427/1666666 [2:23:13<09:52, 212.93it/s]

finished frames 9242400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540537/1666666 [2:23:14<10:28, 200.73it/s]

finished frames 9243000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540623/1666666 [2:23:14<11:38, 180.43it/s]

finished frames 9243600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540729/1666666 [2:23:15<10:17, 203.97it/s]

finished frames 9244200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540836/1666666 [2:23:15<10:05, 207.79it/s]

finished frames 9244800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1540941/1666666 [2:23:16<10:07, 206.96it/s]

finished frames 9245400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1541027/1666666 [2:23:16<10:16, 203.65it/s]

finished frames 9246000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1541132/1666666 [2:23:17<10:12, 205.01it/s]

finished frames 9246600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1541237/1666666 [2:23:17<10:12, 204.78it/s]

finished frames 9247200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1541342/1666666 [2:23:18<10:09, 205.67it/s]

finished frames 9247800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1541426/1666666 [2:23:18<10:10, 205.27it/s]

finished frames 9248400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1541531/1666666 [2:23:19<10:06, 206.25it/s]

finished frames 9249000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 92%|█████████▏| 1541636/1666666 [2:23:19<10:02, 207.45it/s]

finished frames 9249600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1541741/1666666 [2:23:20<10:08, 205.19it/s]

finished frames 9250200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1541825/1666666 [2:23:20<10:09, 204.77it/s]

finished frames 9250800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1541930/1666666 [2:23:20<10:07, 205.21it/s]

finished frames 9251400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542035/1666666 [2:23:21<10:22, 200.37it/s]

finished frames 9252000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542141/1666666 [2:23:22<10:04, 205.96it/s]

finished frames 9252600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542227/1666666 [2:23:22<09:54, 209.23it/s]

finished frames 9253200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542337/1666666 [2:23:22<09:49, 211.05it/s]

finished frames 9253800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542425/1666666 [2:23:23<09:47, 211.36it/s]

finished frames 9254400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542535/1666666 [2:23:23<09:47, 211.29it/s]

finished frames 9255000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542623/1666666 [2:23:24<09:45, 211.85it/s]

finished frames 9255600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542733/1666666 [2:23:24<09:45, 211.66it/s]

finished frames 9256200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542843/1666666 [2:23:25<09:44, 211.85it/s]

finished frames 9256800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1542931/1666666 [2:23:25<09:46, 211.03it/s]

finished frames 9257400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543039/1666666 [2:23:26<10:08, 203.06it/s]

finished frames 9258000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543126/1666666 [2:23:26<09:53, 208.09it/s]

finished frames 9258600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543233/1666666 [2:23:27<09:49, 209.43it/s]

finished frames 9259200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543343/1666666 [2:23:27<09:46, 210.31it/s]

finished frames 9259800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543431/1666666 [2:23:28<09:45, 210.39it/s]

finished frames 9260400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543541/1666666 [2:23:28<09:45, 210.26it/s]

finished frames 9261000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543629/1666666 [2:23:29<09:45, 210.01it/s]

finished frames 9261600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543739/1666666 [2:23:29<09:44, 210.46it/s]

finished frames 9262200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543827/1666666 [2:23:30<09:44, 210.20it/s]

finished frames 9262800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1543937/1666666 [2:23:30<09:42, 210.54it/s]

finished frames 9263400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544024/1666666 [2:23:31<09:56, 205.63it/s]

finished frames 9264000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544132/1666666 [2:23:31<09:46, 209.03it/s]

finished frames 9264600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544240/1666666 [2:23:32<09:42, 210.32it/s]

finished frames 9265200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544328/1666666 [2:23:32<09:43, 209.54it/s]

finished frames 9265800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544438/1666666 [2:23:32<09:41, 210.35it/s]

finished frames 9266400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544526/1666666 [2:23:33<09:40, 210.37it/s]

finished frames 9267000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544636/1666666 [2:23:33<09:37, 211.44it/s]

finished frames 9267600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544724/1666666 [2:23:34<09:36, 211.70it/s]

finished frames 9268200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544834/1666666 [2:23:34<09:35, 211.75it/s]

finished frames 9268800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1544922/1666666 [2:23:35<09:32, 212.53it/s]

finished frames 9269400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545032/1666666 [2:23:35<09:44, 208.27it/s]

finished frames 9270000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545142/1666666 [2:23:36<09:34, 211.37it/s]

finished frames 9270600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545230/1666666 [2:23:36<10:06, 200.08it/s]

finished frames 9271200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545317/1666666 [2:23:37<10:32, 191.73it/s]

finished frames 9271800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545427/1666666 [2:23:37<09:40, 208.82it/s]

finished frames 9272400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545537/1666666 [2:23:38<09:36, 210.19it/s]

finished frames 9273000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545624/1666666 [2:23:38<09:37, 209.62it/s]

finished frames 9273600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545733/1666666 [2:23:39<09:37, 209.51it/s]

finished frames 9274200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545841/1666666 [2:23:39<09:35, 210.13it/s]

finished frames 9274800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1545928/1666666 [2:23:40<09:37, 209.02it/s]

finished frames 9275400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546036/1666666 [2:23:40<09:49, 204.78it/s]

finished frames 9276000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546122/1666666 [2:23:41<09:40, 207.72it/s]

finished frames 9276600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546228/1666666 [2:23:41<09:38, 208.15it/s]

finished frames 9277200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546334/1666666 [2:23:42<09:36, 208.72it/s]

finished frames 9277800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546439/1666666 [2:23:42<09:36, 208.73it/s]

finished frames 9278400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546524/1666666 [2:23:42<09:36, 208.58it/s]

finished frames 9279000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546633/1666666 [2:23:43<09:31, 210.01it/s]

finished frames 9279600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546743/1666666 [2:23:44<09:27, 211.23it/s]

finished frames 9280200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546831/1666666 [2:23:44<09:30, 209.92it/s]

finished frames 9280800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1546940/1666666 [2:23:44<09:29, 210.13it/s]

finished frames 9281400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547027/1666666 [2:23:45<09:44, 204.80it/s]

finished frames 9282000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547133/1666666 [2:23:45<09:32, 208.62it/s]

finished frames 9282600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547240/1666666 [2:23:46<09:29, 209.69it/s]

finished frames 9283200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547328/1666666 [2:23:46<09:28, 209.98it/s]

finished frames 9283800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547437/1666666 [2:23:47<09:29, 209.20it/s]

finished frames 9284400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547522/1666666 [2:23:47<09:30, 208.77it/s]

finished frames 9285000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547628/1666666 [2:23:48<09:41, 204.74it/s]

finished frames 9285600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547735/1666666 [2:23:48<09:45, 203.17it/s]

finished frames 9286200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547840/1666666 [2:23:49<09:33, 207.33it/s]

finished frames 9286800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1547924/1666666 [2:23:49<09:33, 207.09it/s]

finished frames 9287400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548029/1666666 [2:23:50<09:49, 201.12it/s]

finished frames 9288000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548134/1666666 [2:23:50<09:34, 206.27it/s]

finished frames 9288600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548239/1666666 [2:23:51<09:30, 207.59it/s]

finished frames 9289200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548323/1666666 [2:23:51<09:28, 208.06it/s]

finished frames 9289800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548429/1666666 [2:23:52<09:25, 209.18it/s]

finished frames 9290400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548534/1666666 [2:23:52<09:26, 208.63it/s]

finished frames 9291000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548639/1666666 [2:23:53<09:30, 207.00it/s]

finished frames 9291600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548724/1666666 [2:23:53<09:31, 206.40it/s]

finished frames 9292200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548832/1666666 [2:23:54<09:18, 210.87it/s]

finished frames 9292800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1548942/1666666 [2:23:54<09:15, 211.87it/s]

finished frames 9293400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549029/1666666 [2:23:55<09:26, 207.67it/s]

finished frames 9294000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549135/1666666 [2:23:55<09:23, 208.61it/s]

finished frames 9294600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549244/1666666 [2:23:56<09:10, 213.11it/s]

finished frames 9295200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549332/1666666 [2:23:56<09:06, 214.69it/s]

finished frames 9295800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549442/1666666 [2:23:56<09:04, 215.41it/s]

finished frames 9296400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549530/1666666 [2:23:57<09:01, 216.31it/s]

finished frames 9297000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549640/1666666 [2:23:57<09:00, 216.39it/s]

finished frames 9297600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549728/1666666 [2:23:58<09:00, 216.39it/s]

finished frames 9298200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549838/1666666 [2:23:58<08:57, 217.49it/s]

finished frames 9298800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1549926/1666666 [2:23:59<09:08, 212.86it/s]

finished frames 9299400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550035/1666666 [2:23:59<10:13, 190.15it/s]

finished frames 9300000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550143/1666666 [2:24:00<09:21, 207.36it/s]

finished frames 9300600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550231/1666666 [2:24:00<09:00, 215.51it/s]

finished frames 9301200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550341/1666666 [2:24:01<08:52, 218.46it/s]

finished frames 9301800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550429/1666666 [2:24:01<08:50, 219.12it/s]

finished frames 9302400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550539/1666666 [2:24:02<09:03, 213.71it/s]

finished frames 9303000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550627/1666666 [2:24:02<09:02, 213.89it/s]

finished frames 9303600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550737/1666666 [2:24:03<09:10, 210.64it/s]

finished frames 9304200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550825/1666666 [2:24:03<09:12, 209.68it/s]

finished frames 9304800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1550931/1666666 [2:24:03<09:23, 205.27it/s]

finished frames 9305400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551036/1666666 [2:24:04<09:34, 201.27it/s]

finished frames 9306000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551141/1666666 [2:24:05<09:21, 205.83it/s]

finished frames 9306600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551225/1666666 [2:24:05<09:20, 206.12it/s]

finished frames 9307200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551330/1666666 [2:24:05<09:18, 206.63it/s]

finished frames 9307800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551435/1666666 [2:24:06<09:17, 206.81it/s]

finished frames 9308400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551540/1666666 [2:24:06<09:15, 207.31it/s]

finished frames 9309000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551624/1666666 [2:24:07<09:16, 206.90it/s]

finished frames 9309600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551729/1666666 [2:24:07<09:12, 208.06it/s]

finished frames 9310200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551834/1666666 [2:24:08<09:13, 207.42it/s]

finished frames 9310800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1551939/1666666 [2:24:08<09:11, 208.12it/s]

finished frames 9311400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552023/1666666 [2:24:09<09:26, 202.47it/s]

finished frames 9312000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552128/1666666 [2:24:09<09:16, 205.97it/s]

finished frames 9312600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552233/1666666 [2:24:10<09:12, 207.13it/s]

finished frames 9313200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552338/1666666 [2:24:10<09:22, 203.19it/s]

finished frames 9313800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552422/1666666 [2:24:11<09:38, 197.48it/s]

finished frames 9314400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552529/1666666 [2:24:11<09:10, 207.46it/s]

finished frames 9315000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552634/1666666 [2:24:12<09:13, 206.13it/s]

finished frames 9315600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552739/1666666 [2:24:12<09:11, 206.71it/s]

finished frames 9316200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552823/1666666 [2:24:13<09:11, 206.38it/s]

finished frames 9316800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1552928/1666666 [2:24:13<09:15, 204.87it/s]

finished frames 9317400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553032/1666666 [2:24:14<09:30, 199.01it/s]

finished frames 9318000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553137/1666666 [2:24:14<09:15, 204.46it/s]

finished frames 9318600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553242/1666666 [2:24:15<09:09, 206.26it/s]

finished frames 9319200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553326/1666666 [2:24:15<09:12, 205.07it/s]

finished frames 9319800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553431/1666666 [2:24:16<09:09, 205.99it/s]

finished frames 9320400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553536/1666666 [2:24:16<09:04, 207.79it/s]

finished frames 9321000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553641/1666666 [2:24:17<09:02, 208.28it/s]

finished frames 9321600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553725/1666666 [2:24:17<09:05, 207.12it/s]

finished frames 9322200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553830/1666666 [2:24:18<09:02, 207.87it/s]

finished frames 9322800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1553935/1666666 [2:24:18<09:01, 208.37it/s]

finished frames 9323400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554019/1666666 [2:24:19<09:22, 200.18it/s]

finished frames 9324000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554125/1666666 [2:24:19<09:02, 207.38it/s]

finished frames 9324600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554233/1666666 [2:24:20<08:52, 211.20it/s]

finished frames 9325200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554343/1666666 [2:24:20<08:49, 212.26it/s]

finished frames 9325800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554431/1666666 [2:24:20<08:47, 212.78it/s]

finished frames 9326400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554541/1666666 [2:24:21<08:49, 211.69it/s]

finished frames 9327000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554629/1666666 [2:24:21<09:13, 202.49it/s]

finished frames 9327600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554716/1666666 [2:24:22<09:43, 191.92it/s]

finished frames 9328200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554825/1666666 [2:24:22<08:56, 208.66it/s]

finished frames 9328800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1554935/1666666 [2:24:23<08:49, 210.98it/s]

finished frames 9329400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555022/1666666 [2:24:23<09:15, 200.98it/s]

finished frames 9330000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555132/1666666 [2:24:24<08:50, 210.21it/s]

finished frames 9330600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555242/1666666 [2:24:24<08:46, 211.68it/s]

finished frames 9331200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555330/1666666 [2:24:25<08:47, 211.17it/s]

finished frames 9331800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555440/1666666 [2:24:25<08:46, 211.30it/s]

finished frames 9332400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555528/1666666 [2:24:26<08:47, 210.80it/s]

finished frames 9333000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555638/1666666 [2:24:26<08:45, 211.36it/s]

finished frames 9333600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555726/1666666 [2:24:27<08:45, 211.07it/s]

finished frames 9334200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555836/1666666 [2:24:27<08:45, 210.82it/s]

finished frames 9334800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1555924/1666666 [2:24:28<08:46, 210.35it/s]

finished frames 9335400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556033/1666666 [2:24:28<08:54, 206.99it/s]

finished frames 9336000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556143/1666666 [2:24:29<08:46, 210.08it/s]

finished frames 9336600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556231/1666666 [2:24:29<08:45, 210.03it/s]

finished frames 9337200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556341/1666666 [2:24:30<08:43, 210.58it/s]

finished frames 9337800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556428/1666666 [2:24:30<08:48, 208.70it/s]

finished frames 9338400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556533/1666666 [2:24:30<08:53, 206.52it/s]

finished frames 9339000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556638/1666666 [2:24:31<08:52, 206.57it/s]

finished frames 9339600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556722/1666666 [2:24:31<08:53, 205.96it/s]

finished frames 9340200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556827/1666666 [2:24:32<08:54, 205.60it/s]

finished frames 9340800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1556932/1666666 [2:24:32<08:52, 206.27it/s]

finished frames 9341400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557037/1666666 [2:24:33<09:03, 201.54it/s]

finished frames 9342000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557142/1666666 [2:24:33<09:04, 201.32it/s]

finished frames 9342600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557226/1666666 [2:24:34<08:57, 203.61it/s]

finished frames 9343200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557331/1666666 [2:24:34<08:50, 206.06it/s]

finished frames 9343800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557437/1666666 [2:24:35<08:44, 208.21it/s]

finished frames 9344400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557525/1666666 [2:24:35<08:40, 209.77it/s]

finished frames 9345000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557635/1666666 [2:24:36<08:38, 210.17it/s]

finished frames 9345600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557723/1666666 [2:24:36<08:37, 210.49it/s]

finished frames 9346200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557833/1666666 [2:24:37<08:37, 210.29it/s]

finished frames 9346800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1557943/1666666 [2:24:37<08:36, 210.64it/s]

finished frames 9347400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1558031/1666666 [2:24:38<08:43, 207.59it/s]

finished frames 9348000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1558141/1666666 [2:24:38<08:33, 211.39it/s]

finished frames 9348600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 93%|█████████▎| 1558229/1666666 [2:24:39<08:32, 211.79it/s]

finished frames 9349200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1558339/1666666 [2:24:39<08:29, 212.51it/s]

finished frames 9349800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1558427/1666666 [2:24:40<08:28, 212.72it/s]

finished frames 9350400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1558537/1666666 [2:24:40<08:26, 213.39it/s]

finished frames 9351000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1558625/1666666 [2:24:41<08:28, 212.60it/s]

finished frames 9351600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1558735/1666666 [2:24:41<08:25, 213.55it/s]

finished frames 9352200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1558823/1666666 [2:24:41<08:27, 212.44it/s]

finished frames 9352800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1558933/1666666 [2:24:42<08:23, 214.18it/s]

finished frames 9353400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559021/1666666 [2:24:42<08:43, 205.78it/s]

finished frames 9354000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559126/1666666 [2:24:43<08:40, 206.56it/s]

finished frames 9354600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559231/1666666 [2:24:43<08:42, 205.66it/s]

finished frames 9355200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559341/1666666 [2:24:44<09:08, 195.83it/s]

finished frames 9355800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559428/1666666 [2:24:44<10:04, 177.31it/s]

finished frames 9356400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559537/1666666 [2:24:45<08:40, 205.63it/s]

finished frames 9357000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559625/1666666 [2:24:45<08:28, 210.70it/s]

finished frames 9357600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559735/1666666 [2:24:46<08:26, 211.16it/s]

finished frames 9358200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559823/1666666 [2:24:46<08:25, 211.16it/s]

finished frames 9358800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1559931/1666666 [2:24:47<08:39, 205.59it/s]

finished frames 9359400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560040/1666666 [2:24:47<08:40, 204.96it/s]

finished frames 9360000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560125/1666666 [2:24:48<08:34, 207.23it/s]

finished frames 9360600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560231/1666666 [2:24:48<08:33, 207.31it/s]

finished frames 9361200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560341/1666666 [2:24:49<08:22, 211.52it/s]

finished frames 9361800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560429/1666666 [2:24:49<08:20, 212.37it/s]

finished frames 9362400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560539/1666666 [2:24:50<08:20, 212.13it/s]

finished frames 9363000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560627/1666666 [2:24:50<08:17, 213.18it/s]

finished frames 9363600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560737/1666666 [2:24:51<08:15, 213.73it/s]

finished frames 9364200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560825/1666666 [2:24:51<08:18, 212.36it/s]

finished frames 9364800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1560935/1666666 [2:24:52<08:15, 213.31it/s]

finished frames 9365400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561022/1666666 [2:24:52<08:29, 207.16it/s]

finished frames 9366000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561132/1666666 [2:24:53<08:17, 212.18it/s]

finished frames 9366600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561242/1666666 [2:24:53<08:14, 213.07it/s]

finished frames 9367200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561330/1666666 [2:24:53<08:14, 212.89it/s]

finished frames 9367800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561440/1666666 [2:24:54<08:14, 212.91it/s]

finished frames 9368400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561528/1666666 [2:24:54<08:14, 212.64it/s]

finished frames 9369000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561638/1666666 [2:24:55<08:16, 211.44it/s]

finished frames 9369600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561725/1666666 [2:24:55<08:29, 206.16it/s]

finished frames 9370200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561835/1666666 [2:24:56<08:16, 211.03it/s]

finished frames 9370800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1561923/1666666 [2:24:56<08:14, 212.02it/s]

finished frames 9371400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1562032/1666666 [2:24:57<08:22, 208.18it/s]

finished frames 9372000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1562137/1666666 [2:24:57<08:28, 205.61it/s]

finished frames 9372600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1562242/1666666 [2:24:58<08:29, 204.89it/s]

finished frames 9373200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1562326/1666666 [2:24:58<08:28, 205.25it/s]

finished frames 9373800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▎| 1562431/1666666 [2:24:59<08:28, 204.82it/s]

finished frames 9374400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1562536/1666666 [2:24:59<08:26, 205.69it/s]

finished frames 9375000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1562641/1666666 [2:25:00<08:28, 204.73it/s]

finished frames 9375600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1562725/1666666 [2:25:00<08:29, 204.02it/s]

finished frames 9376200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1562830/1666666 [2:25:01<08:27, 204.77it/s]

finished frames 9376800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1562935/1666666 [2:25:01<08:25, 205.07it/s]

finished frames 9377400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563019/1666666 [2:25:02<08:42, 198.21it/s]

finished frames 9378000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563127/1666666 [2:25:02<08:15, 209.08it/s]

finished frames 9378600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563237/1666666 [2:25:03<08:07, 212.32it/s]

finished frames 9379200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563325/1666666 [2:25:03<08:07, 212.03it/s]

finished frames 9379800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563435/1666666 [2:25:04<08:06, 212.28it/s]

finished frames 9380400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563523/1666666 [2:25:04<08:05, 212.61it/s]

finished frames 9381000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563633/1666666 [2:25:05<08:05, 212.39it/s]

finished frames 9381600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563743/1666666 [2:25:05<08:06, 211.44it/s]

finished frames 9382200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563831/1666666 [2:25:05<08:07, 210.84it/s]

finished frames 9382800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1563941/1666666 [2:25:06<08:07, 210.92it/s]

finished frames 9383400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564028/1666666 [2:25:06<08:18, 205.85it/s]

finished frames 9384000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564134/1666666 [2:25:07<08:54, 191.98it/s]

finished frames 9384600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564242/1666666 [2:25:07<08:15, 206.87it/s]

finished frames 9385200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564329/1666666 [2:25:08<08:09, 209.13it/s]

finished frames 9385800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564434/1666666 [2:25:08<08:11, 207.92it/s]

finished frames 9386400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564540/1666666 [2:25:09<08:10, 208.20it/s]

finished frames 9387000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564624/1666666 [2:25:09<08:09, 208.39it/s]

finished frames 9387600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564730/1666666 [2:25:10<08:09, 208.23it/s]

finished frames 9388200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564836/1666666 [2:25:10<08:08, 208.25it/s]

finished frames 9388800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1564942/1666666 [2:25:11<08:06, 209.12it/s]

finished frames 9389400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565026/1666666 [2:25:11<08:34, 197.38it/s]

finished frames 9390000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565131/1666666 [2:25:12<08:12, 206.30it/s]

finished frames 9390600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565237/1666666 [2:25:12<08:05, 208.72it/s]

finished frames 9391200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565323/1666666 [2:25:13<08:03, 209.61it/s]

finished frames 9391800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565433/1666666 [2:25:13<07:58, 211.68it/s]

finished frames 9392400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565543/1666666 [2:25:14<07:57, 211.95it/s]

finished frames 9393000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565631/1666666 [2:25:14<07:58, 211.21it/s]

finished frames 9393600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565741/1666666 [2:25:15<07:55, 212.45it/s]

finished frames 9394200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565829/1666666 [2:25:15<07:54, 212.67it/s]

finished frames 9394800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1565938/1666666 [2:25:16<07:56, 211.37it/s]

finished frames 9395400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566025/1666666 [2:25:16<08:22, 200.12it/s]

finished frames 9396000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566130/1666666 [2:25:17<08:11, 204.75it/s]

finished frames 9396600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566235/1666666 [2:25:17<08:07, 206.06it/s]

finished frames 9397200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566340/1666666 [2:25:18<08:04, 206.99it/s]

finished frames 9397800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566428/1666666 [2:25:18<07:56, 210.21it/s]

finished frames 9398400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566538/1666666 [2:25:18<07:53, 211.28it/s]

finished frames 9399000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566626/1666666 [2:25:19<07:52, 211.57it/s]

finished frames 9399600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566736/1666666 [2:25:19<07:54, 210.50it/s]

finished frames 9400200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566824/1666666 [2:25:20<07:53, 210.97it/s]

finished frames 9400800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1566934/1666666 [2:25:20<07:49, 212.51it/s]

finished frames 9401400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567022/1666666 [2:25:21<08:05, 205.30it/s]

finished frames 9402000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567132/1666666 [2:25:21<07:50, 211.41it/s]

finished frames 9402600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567242/1666666 [2:25:22<07:49, 211.94it/s]

finished frames 9403200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567330/1666666 [2:25:22<07:49, 211.75it/s]

finished frames 9403800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567440/1666666 [2:25:23<07:51, 210.41it/s]

finished frames 9404400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567525/1666666 [2:25:23<07:59, 206.87it/s]

finished frames 9405000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567630/1666666 [2:25:24<08:00, 206.04it/s]

finished frames 9405600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567735/1666666 [2:25:24<07:59, 206.47it/s]

finished frames 9406200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567840/1666666 [2:25:25<07:57, 207.00it/s]

finished frames 9406800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1567924/1666666 [2:25:25<07:59, 205.86it/s]

finished frames 9407400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568029/1666666 [2:25:26<08:11, 200.88it/s]

finished frames 9408000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568134/1666666 [2:25:26<07:58, 205.71it/s]

finished frames 9408600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568239/1666666 [2:25:27<07:56, 206.36it/s]

finished frames 9409200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568323/1666666 [2:25:27<07:58, 205.61it/s]

finished frames 9409800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568428/1666666 [2:25:28<07:53, 207.61it/s]

finished frames 9410400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568533/1666666 [2:25:28<07:55, 206.48it/s]

finished frames 9411000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568638/1666666 [2:25:29<07:52, 207.47it/s]

finished frames 9411600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568724/1666666 [2:25:29<08:35, 190.09it/s]

finished frames 9412200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568834/1666666 [2:25:30<07:49, 208.37it/s]

finished frames 9412800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1568922/1666666 [2:25:30<07:45, 209.89it/s]

finished frames 9413400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569032/1666666 [2:25:30<07:49, 208.14it/s]

finished frames 9414000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569142/1666666 [2:25:31<07:42, 210.88it/s]

finished frames 9414600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569230/1666666 [2:25:31<07:41, 211.16it/s]

finished frames 9415200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569340/1666666 [2:25:32<07:39, 211.69it/s]

finished frames 9415800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569428/1666666 [2:25:32<07:40, 211.36it/s]

finished frames 9416400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569538/1666666 [2:25:33<07:40, 211.13it/s]

finished frames 9417000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569626/1666666 [2:25:33<07:40, 210.52it/s]

finished frames 9417600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569736/1666666 [2:25:34<07:40, 210.64it/s]

finished frames 9418200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569824/1666666 [2:25:34<07:40, 210.25it/s]

finished frames 9418800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1569934/1666666 [2:25:35<07:38, 210.92it/s]

finished frames 9419400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570022/1666666 [2:25:35<07:51, 205.08it/s]

finished frames 9420000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570132/1666666 [2:25:36<07:39, 210.30it/s]

finished frames 9420600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570242/1666666 [2:25:36<07:34, 212.12it/s]

finished frames 9421200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570330/1666666 [2:25:37<07:35, 211.60it/s]

finished frames 9421800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570440/1666666 [2:25:37<07:34, 211.85it/s]

finished frames 9422400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570528/1666666 [2:25:38<07:34, 211.60it/s]

finished frames 9423000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570638/1666666 [2:25:38<07:33, 211.70it/s]

finished frames 9423600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570726/1666666 [2:25:38<07:33, 211.65it/s]

finished frames 9424200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570836/1666666 [2:25:39<07:33, 211.35it/s]

finished frames 9424800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1570924/1666666 [2:25:39<07:32, 211.69it/s]

finished frames 9425400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571033/1666666 [2:25:40<07:41, 207.00it/s]

finished frames 9426000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571141/1666666 [2:25:40<07:46, 204.74it/s]

finished frames 9426600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571227/1666666 [2:25:41<08:03, 197.31it/s]

finished frames 9427200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571337/1666666 [2:25:41<07:33, 210.35it/s]

finished frames 9427800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571425/1666666 [2:25:42<07:31, 211.07it/s]

finished frames 9428400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571535/1666666 [2:25:42<07:28, 212.11it/s]

finished frames 9429000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571623/1666666 [2:25:43<07:29, 211.24it/s]

finished frames 9429600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571733/1666666 [2:25:43<07:25, 213.28it/s]

finished frames 9430200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571843/1666666 [2:25:44<07:25, 212.97it/s]

finished frames 9430800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1571931/1666666 [2:25:44<07:27, 211.59it/s]

finished frames 9431400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572041/1666666 [2:25:45<07:37, 207.06it/s]

finished frames 9432000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572129/1666666 [2:25:45<07:29, 210.41it/s]

finished frames 9432600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572239/1666666 [2:25:46<07:25, 212.12it/s]

finished frames 9433200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572327/1666666 [2:25:46<07:24, 212.03it/s]

finished frames 9433800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572437/1666666 [2:25:47<07:26, 211.15it/s]

finished frames 9434400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572525/1666666 [2:25:47<07:25, 211.36it/s]

finished frames 9435000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572635/1666666 [2:25:48<07:24, 211.78it/s]

finished frames 9435600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572723/1666666 [2:25:48<07:19, 213.58it/s]

finished frames 9436200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572833/1666666 [2:25:48<07:16, 214.77it/s]

finished frames 9436800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1572943/1666666 [2:25:49<07:19, 213.23it/s]

finished frames 9437400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573030/1666666 [2:25:49<07:34, 206.08it/s]

finished frames 9438000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573139/1666666 [2:25:50<07:22, 211.14it/s]

finished frames 9438600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573227/1666666 [2:25:50<07:24, 210.41it/s]

finished frames 9439200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573337/1666666 [2:25:51<07:20, 211.86it/s]

finished frames 9439800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573425/1666666 [2:25:51<07:21, 211.05it/s]

finished frames 9440400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573535/1666666 [2:25:52<07:20, 211.35it/s]

finished frames 9441000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573643/1666666 [2:25:52<07:32, 205.55it/s]

finished frames 9441600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573731/1666666 [2:25:53<07:20, 210.89it/s]

finished frames 9442200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573841/1666666 [2:25:53<07:24, 208.91it/s]

finished frames 9442800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1573929/1666666 [2:25:54<07:12, 214.60it/s]

finished frames 9443400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574038/1666666 [2:25:54<07:28, 206.59it/s]

finished frames 9444000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574123/1666666 [2:25:55<07:24, 208.19it/s]

finished frames 9444600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574228/1666666 [2:25:55<07:25, 207.53it/s]

finished frames 9445200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574337/1666666 [2:25:56<07:15, 211.97it/s]

finished frames 9445800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574425/1666666 [2:25:56<07:11, 213.93it/s]

finished frames 9446400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574535/1666666 [2:25:57<07:09, 214.63it/s]

finished frames 9447000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574623/1666666 [2:25:57<07:08, 214.62it/s]

finished frames 9447600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574733/1666666 [2:25:57<07:08, 214.49it/s]

finished frames 9448200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574843/1666666 [2:25:58<07:11, 212.78it/s]

finished frames 9448800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 94%|█████████▍| 1574931/1666666 [2:25:58<07:10, 212.95it/s]

finished frames 9449400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575040/1666666 [2:25:59<07:19, 208.38it/s]

finished frames 9450000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575125/1666666 [2:25:59<07:18, 208.59it/s]

finished frames 9450600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575233/1666666 [2:26:00<07:13, 210.92it/s]

finished frames 9451200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575343/1666666 [2:26:00<06:59, 217.58it/s]

finished frames 9451800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575431/1666666 [2:26:01<06:56, 218.85it/s]

finished frames 9452400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575541/1666666 [2:26:01<06:55, 219.12it/s]

finished frames 9453000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575629/1666666 [2:26:02<07:09, 212.09it/s]

finished frames 9453600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575739/1666666 [2:26:02<07:11, 210.50it/s]

finished frames 9454200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575826/1666666 [2:26:03<07:13, 209.48it/s]

finished frames 9454800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1575935/1666666 [2:26:03<07:15, 208.54it/s]

finished frames 9455400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576019/1666666 [2:26:04<07:29, 201.47it/s]

finished frames 9456000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576124/1666666 [2:26:04<07:21, 204.98it/s]

finished frames 9456600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576229/1666666 [2:26:05<07:18, 206.06it/s]

finished frames 9457200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576334/1666666 [2:26:05<07:15, 207.48it/s]

finished frames 9457800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576439/1666666 [2:26:06<07:15, 207.35it/s]

finished frames 9458400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576523/1666666 [2:26:06<07:15, 206.87it/s]

finished frames 9459000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576629/1666666 [2:26:07<07:12, 208.00it/s]

finished frames 9459600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576734/1666666 [2:26:07<07:13, 207.64it/s]

finished frames 9460200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576839/1666666 [2:26:08<07:13, 207.43it/s]

finished frames 9460800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1576923/1666666 [2:26:08<07:12, 207.71it/s]

finished frames 9461400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577028/1666666 [2:26:08<07:22, 202.64it/s]

finished frames 9462000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577133/1666666 [2:26:09<07:13, 206.49it/s]

finished frames 9462600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577238/1666666 [2:26:09<07:11, 207.12it/s]

finished frames 9463200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577322/1666666 [2:26:10<07:12, 206.43it/s]

finished frames 9463800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577428/1666666 [2:26:10<07:09, 207.70it/s]

finished frames 9464400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577533/1666666 [2:26:11<07:09, 207.32it/s]

finished frames 9465000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577638/1666666 [2:26:11<07:10, 206.79it/s]

finished frames 9465600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577722/1666666 [2:26:12<07:08, 207.49it/s]

finished frames 9466200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577829/1666666 [2:26:12<07:05, 208.87it/s]

finished frames 9466800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1577936/1666666 [2:26:13<07:03, 209.63it/s]

finished frames 9467400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578022/1666666 [2:26:13<07:12, 205.13it/s]

finished frames 9468000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578130/1666666 [2:26:14<07:01, 210.04it/s]

finished frames 9468600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578240/1666666 [2:26:14<07:00, 210.44it/s]

finished frames 9469200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578327/1666666 [2:26:15<07:22, 199.44it/s]

finished frames 9469800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578437/1666666 [2:26:15<06:58, 210.86it/s]

finished frames 9470400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578525/1666666 [2:26:16<06:55, 212.18it/s]

finished frames 9471000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578635/1666666 [2:26:16<06:54, 212.58it/s]

finished frames 9471600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578723/1666666 [2:26:17<06:52, 213.13it/s]

finished frames 9472200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578833/1666666 [2:26:17<06:49, 214.65it/s]

finished frames 9472800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1578943/1666666 [2:26:18<06:57, 210.29it/s]

finished frames 9473400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579028/1666666 [2:26:18<07:10, 203.79it/s]

finished frames 9474000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579135/1666666 [2:26:19<07:01, 207.67it/s]

finished frames 9474600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579242/1666666 [2:26:19<06:57, 209.30it/s]

finished frames 9475200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579327/1666666 [2:26:19<06:57, 209.39it/s]

finished frames 9475800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579432/1666666 [2:26:20<06:57, 208.85it/s]

finished frames 9476400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579538/1666666 [2:26:20<06:57, 208.76it/s]

finished frames 9477000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579624/1666666 [2:26:21<06:55, 209.59it/s]

finished frames 9477600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579733/1666666 [2:26:21<06:52, 210.82it/s]

finished frames 9478200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579842/1666666 [2:26:22<06:54, 209.56it/s]

finished frames 9478800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1579927/1666666 [2:26:22<06:55, 208.75it/s]

finished frames 9479400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580032/1666666 [2:26:23<07:05, 203.44it/s]

finished frames 9480000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580138/1666666 [2:26:23<06:57, 207.49it/s]

finished frames 9480600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580223/1666666 [2:26:24<06:55, 208.08it/s]

finished frames 9481200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580332/1666666 [2:26:24<06:52, 209.26it/s]

finished frames 9481800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580441/1666666 [2:26:25<06:50, 209.88it/s]

finished frames 9482400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580525/1666666 [2:26:25<06:53, 208.47it/s]

finished frames 9483000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580631/1666666 [2:26:26<06:52, 208.74it/s]

finished frames 9483600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580739/1666666 [2:26:26<07:02, 203.55it/s]

finished frames 9484200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580824/1666666 [2:26:27<06:55, 206.84it/s]

finished frames 9484800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1580930/1666666 [2:26:27<06:51, 208.38it/s]

finished frames 9485400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581036/1666666 [2:26:28<06:59, 204.02it/s]

finished frames 9486000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581142/1666666 [2:26:28<06:51, 208.05it/s]

finished frames 9486600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581228/1666666 [2:26:29<06:48, 209.22it/s]

finished frames 9487200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581335/1666666 [2:26:29<06:47, 209.27it/s]

finished frames 9487800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581422/1666666 [2:26:30<06:47, 209.07it/s]

finished frames 9488400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581532/1666666 [2:26:30<06:43, 211.13it/s]

finished frames 9489000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581642/1666666 [2:26:31<06:44, 210.05it/s]

finished frames 9489600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581730/1666666 [2:26:31<06:43, 210.69it/s]

finished frames 9490200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581840/1666666 [2:26:32<06:43, 210.37it/s]

finished frames 9490800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1581928/1666666 [2:26:32<06:43, 210.24it/s]

finished frames 9491400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582037/1666666 [2:26:32<06:52, 205.13it/s]

finished frames 9492000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582121/1666666 [2:26:33<06:47, 207.66it/s]

finished frames 9492600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582231/1666666 [2:26:33<06:39, 211.13it/s]

finished frames 9493200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582341/1666666 [2:26:34<06:40, 210.76it/s]

finished frames 9493800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582429/1666666 [2:26:34<06:39, 210.76it/s]

finished frames 9494400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582539/1666666 [2:26:35<06:38, 210.98it/s]

finished frames 9495000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582627/1666666 [2:26:35<06:39, 210.10it/s]

finished frames 9495600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582734/1666666 [2:26:36<06:45, 207.14it/s]

finished frames 9496200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582822/1666666 [2:26:36<06:41, 208.71it/s]

finished frames 9496800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1582932/1666666 [2:26:37<06:38, 209.98it/s]

finished frames 9497400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1583020/1666666 [2:26:37<07:24, 188.31it/s]

finished frames 9498000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1583126/1666666 [2:26:38<06:45, 206.04it/s]

finished frames 9498600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▍| 1583232/1666666 [2:26:38<06:39, 208.65it/s]

finished frames 9499200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1583342/1666666 [2:26:39<06:36, 210.37it/s]

finished frames 9499800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1583430/1666666 [2:26:39<06:35, 210.45it/s]

finished frames 9500400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1583540/1666666 [2:26:40<06:33, 211.48it/s]

finished frames 9501000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1583628/1666666 [2:26:40<06:32, 211.66it/s]

finished frames 9501600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1583738/1666666 [2:26:41<06:31, 211.62it/s]

finished frames 9502200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1583826/1666666 [2:26:41<06:32, 211.13it/s]

finished frames 9502800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1583936/1666666 [2:26:42<06:34, 209.74it/s]

finished frames 9503400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584020/1666666 [2:26:42<06:47, 202.67it/s]

finished frames 9504000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584129/1666666 [2:26:42<06:34, 209.06it/s]

finished frames 9504600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584234/1666666 [2:26:43<06:35, 208.61it/s]

finished frames 9505200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584322/1666666 [2:26:43<06:32, 210.05it/s]

finished frames 9505800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584432/1666666 [2:26:44<06:25, 213.07it/s]

finished frames 9506400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584542/1666666 [2:26:44<06:25, 212.77it/s]

finished frames 9507000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584630/1666666 [2:26:45<06:26, 212.16it/s]

finished frames 9507600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584740/1666666 [2:26:45<06:24, 213.00it/s]

finished frames 9508200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584828/1666666 [2:26:46<06:24, 212.67it/s]

finished frames 9508800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1584938/1666666 [2:26:46<06:22, 213.75it/s]

finished frames 9509400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585025/1666666 [2:26:47<06:32, 207.86it/s]

finished frames 9510000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585135/1666666 [2:26:47<06:22, 213.11it/s]

finished frames 9510600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585223/1666666 [2:26:48<06:20, 214.09it/s]

finished frames 9511200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585333/1666666 [2:26:48<06:35, 205.85it/s]

finished frames 9511800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585443/1666666 [2:26:49<06:37, 204.45it/s]

finished frames 9512400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585531/1666666 [2:26:49<06:24, 210.81it/s]

finished frames 9513000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585641/1666666 [2:26:50<06:20, 212.99it/s]

finished frames 9513600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585729/1666666 [2:26:50<06:20, 212.93it/s]

finished frames 9514200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585839/1666666 [2:26:51<06:18, 213.38it/s]

finished frames 9514800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1585927/1666666 [2:26:51<06:19, 212.79it/s]

finished frames 9515400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586037/1666666 [2:26:51<06:26, 208.59it/s]

finished frames 9516000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586125/1666666 [2:26:52<06:20, 211.44it/s]

finished frames 9516600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586235/1666666 [2:26:52<06:18, 212.33it/s]

finished frames 9517200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586323/1666666 [2:26:53<06:18, 212.30it/s]

finished frames 9517800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586433/1666666 [2:26:53<06:17, 212.48it/s]

finished frames 9518400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586543/1666666 [2:26:54<06:17, 212.47it/s]

finished frames 9519000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586631/1666666 [2:26:54<06:19, 211.00it/s]

finished frames 9519600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586741/1666666 [2:26:55<06:21, 209.50it/s]

finished frames 9520200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586826/1666666 [2:26:55<06:21, 209.10it/s]

finished frames 9520800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1586932/1666666 [2:26:56<06:22, 208.43it/s]

finished frames 9521400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587037/1666666 [2:26:56<06:30, 203.98it/s]

finished frames 9522000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587121/1666666 [2:26:57<06:24, 206.74it/s]

finished frames 9522600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587230/1666666 [2:26:57<06:17, 210.23it/s]

finished frames 9523200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587338/1666666 [2:26:58<06:19, 209.29it/s]

finished frames 9523800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587422/1666666 [2:26:58<06:20, 208.31it/s]

finished frames 9524400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587531/1666666 [2:26:59<06:16, 209.99it/s]

finished frames 9525000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587617/1666666 [2:26:59<06:19, 208.25it/s]

finished frames 9525600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587723/1666666 [2:27:00<06:27, 203.52it/s]

finished frames 9526200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587830/1666666 [2:27:00<06:16, 209.26it/s]

finished frames 9526800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1587936/1666666 [2:27:01<06:16, 209.31it/s]

finished frames 9527400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588020/1666666 [2:27:01<06:30, 201.60it/s]

finished frames 9528000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588125/1666666 [2:27:01<06:25, 203.99it/s]

finished frames 9528600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588230/1666666 [2:27:02<06:22, 205.18it/s]

finished frames 9529200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588335/1666666 [2:27:03<06:20, 205.61it/s]

finished frames 9529800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588441/1666666 [2:27:03<06:19, 205.91it/s]

finished frames 9530400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588525/1666666 [2:27:03<06:21, 204.72it/s]

finished frames 9531000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588630/1666666 [2:27:04<06:19, 205.70it/s]

finished frames 9531600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588735/1666666 [2:27:04<06:20, 204.69it/s]

finished frames 9532200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588840/1666666 [2:27:05<06:20, 204.78it/s]

finished frames 9532800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1588924/1666666 [2:27:05<06:20, 204.49it/s]

finished frames 9533400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589029/1666666 [2:27:06<06:26, 201.13it/s]

finished frames 9534000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589134/1666666 [2:27:06<06:16, 205.96it/s]

finished frames 9534600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589239/1666666 [2:27:07<06:14, 206.82it/s]

finished frames 9535200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589323/1666666 [2:27:07<06:11, 208.04it/s]

finished frames 9535800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589429/1666666 [2:27:08<06:10, 208.40it/s]

finished frames 9536400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589535/1666666 [2:27:08<06:09, 208.66it/s]

finished frames 9537000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589623/1666666 [2:27:09<06:04, 211.50it/s]

finished frames 9537600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589733/1666666 [2:27:09<06:00, 213.27it/s]

finished frames 9538200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589843/1666666 [2:27:10<06:02, 212.12it/s]

finished frames 9538800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1589931/1666666 [2:27:10<06:03, 211.20it/s]

finished frames 9539400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590039/1666666 [2:27:11<06:33, 194.76it/s]

finished frames 9540000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590126/1666666 [2:27:11<06:24, 198.95it/s]

finished frames 9540600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590236/1666666 [2:27:12<06:04, 209.70it/s]

finished frames 9541200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590324/1666666 [2:27:12<06:02, 210.62it/s]

finished frames 9541800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590434/1666666 [2:27:13<06:00, 211.47it/s]

finished frames 9542400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590522/1666666 [2:27:13<05:59, 211.58it/s]

finished frames 9543000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590632/1666666 [2:27:14<05:56, 213.06it/s]

finished frames 9543600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590742/1666666 [2:27:14<05:57, 212.65it/s]

finished frames 9544200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590830/1666666 [2:27:15<05:56, 212.81it/s]

finished frames 9544800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1590940/1666666 [2:27:15<05:55, 212.91it/s]

finished frames 9545400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1591027/1666666 [2:27:15<06:04, 207.74it/s]

finished frames 9546000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1591137/1666666 [2:27:16<05:57, 211.51it/s]

finished frames 9546600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1591225/1666666 [2:27:16<05:56, 211.70it/s]

finished frames 9547200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1591335/1666666 [2:27:17<05:54, 212.29it/s]

finished frames 9547800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1591423/1666666 [2:27:17<05:54, 212.09it/s]

finished frames 9548400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1591533/1666666 [2:27:18<05:50, 214.10it/s]

finished frames 9549000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 95%|█████████▌| 1591643/1666666 [2:27:18<05:52, 213.01it/s]

finished frames 9549600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1591731/1666666 [2:27:19<05:52, 212.58it/s]

finished frames 9550200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1591841/1666666 [2:27:19<05:52, 212.49it/s]

finished frames 9550800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1591929/1666666 [2:27:20<05:50, 213.04it/s]

finished frames 9551400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592039/1666666 [2:27:20<05:56, 209.12it/s]

finished frames 9552000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592127/1666666 [2:27:21<05:50, 212.47it/s]

finished frames 9552600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592237/1666666 [2:27:21<05:48, 213.63it/s]

finished frames 9553200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592325/1666666 [2:27:22<05:50, 212.06it/s]

finished frames 9553800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592434/1666666 [2:27:22<05:56, 208.47it/s]

finished frames 9554400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592522/1666666 [2:27:23<05:51, 211.14it/s]

finished frames 9555000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592632/1666666 [2:27:23<05:46, 213.41it/s]

finished frames 9555600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592742/1666666 [2:27:24<05:46, 213.21it/s]

finished frames 9556200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592830/1666666 [2:27:24<05:46, 213.01it/s]

finished frames 9556800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1592940/1666666 [2:27:25<05:47, 212.34it/s]

finished frames 9557400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593027/1666666 [2:27:25<05:57, 206.17it/s]

finished frames 9558000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593136/1666666 [2:27:25<05:47, 211.45it/s]

finished frames 9558600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593224/1666666 [2:27:26<05:45, 212.78it/s]

finished frames 9559200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593334/1666666 [2:27:26<05:41, 214.88it/s]

finished frames 9559800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593422/1666666 [2:27:27<05:43, 213.28it/s]

finished frames 9560400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593532/1666666 [2:27:27<05:41, 214.40it/s]

finished frames 9561000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593642/1666666 [2:27:28<05:41, 213.89it/s]

finished frames 9561600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593730/1666666 [2:27:28<05:40, 213.94it/s]

finished frames 9562200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593840/1666666 [2:27:29<05:41, 213.07it/s]

finished frames 9562800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1593928/1666666 [2:27:29<05:39, 214.35it/s]

finished frames 9563400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594038/1666666 [2:27:30<05:48, 208.46it/s]

finished frames 9564000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594126/1666666 [2:27:30<05:44, 210.84it/s]

finished frames 9564600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594236/1666666 [2:27:31<05:42, 211.22it/s]

finished frames 9565200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594324/1666666 [2:27:31<05:44, 209.74it/s]

finished frames 9565800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594434/1666666 [2:27:32<05:43, 210.55it/s]

finished frames 9566400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594522/1666666 [2:27:32<05:44, 209.20it/s]

finished frames 9567000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594631/1666666 [2:27:32<05:43, 209.97it/s]

finished frames 9567600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594719/1666666 [2:27:33<05:41, 210.49it/s]

finished frames 9568200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594827/1666666 [2:27:33<05:53, 203.12it/s]

finished frames 9568800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1594935/1666666 [2:27:34<05:50, 204.80it/s]

finished frames 9569400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595019/1666666 [2:27:34<06:11, 192.92it/s]

finished frames 9570000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595129/1666666 [2:27:35<05:42, 208.70it/s]

finished frames 9570600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595238/1666666 [2:27:35<05:40, 209.64it/s]

finished frames 9571200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595322/1666666 [2:27:36<05:43, 207.85it/s]

finished frames 9571800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595427/1666666 [2:27:36<05:42, 207.70it/s]

finished frames 9572400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595532/1666666 [2:27:37<05:44, 206.69it/s]

finished frames 9573000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595638/1666666 [2:27:37<05:42, 207.60it/s]

finished frames 9573600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595726/1666666 [2:27:38<05:36, 210.77it/s]

finished frames 9574200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595836/1666666 [2:27:38<05:35, 210.98it/s]

finished frames 9574800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1595924/1666666 [2:27:39<05:35, 211.03it/s]

finished frames 9575400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596033/1666666 [2:27:39<05:43, 205.46it/s]

finished frames 9576000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596142/1666666 [2:27:40<05:34, 210.93it/s]

finished frames 9576600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596230/1666666 [2:27:40<05:33, 211.12it/s]

finished frames 9577200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596340/1666666 [2:27:41<05:31, 211.96it/s]

finished frames 9577800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596428/1666666 [2:27:41<05:32, 211.47it/s]

finished frames 9578400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596538/1666666 [2:27:42<05:29, 212.55it/s]

finished frames 9579000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596626/1666666 [2:27:42<05:30, 212.13it/s]

finished frames 9579600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596733/1666666 [2:27:43<05:37, 207.16it/s]

finished frames 9580200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596838/1666666 [2:27:43<05:38, 206.39it/s]

finished frames 9580800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1596922/1666666 [2:27:44<05:37, 206.73it/s]

finished frames 9581400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597027/1666666 [2:27:44<05:47, 200.44it/s]

finished frames 9582000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597132/1666666 [2:27:45<05:47, 199.91it/s]

finished frames 9582600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597237/1666666 [2:27:45<05:39, 204.75it/s]

finished frames 9583200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597343/1666666 [2:27:46<05:33, 207.63it/s]

finished frames 9583800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597431/1666666 [2:27:46<05:29, 210.34it/s]

finished frames 9584400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597541/1666666 [2:27:47<05:26, 211.86it/s]

finished frames 9585000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597629/1666666 [2:27:47<05:27, 211.00it/s]

finished frames 9585600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597739/1666666 [2:27:47<05:27, 210.52it/s]

finished frames 9586200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597825/1666666 [2:27:48<05:29, 209.18it/s]

finished frames 9586800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1597930/1666666 [2:27:48<05:29, 208.40it/s]

finished frames 9587400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598035/1666666 [2:27:49<05:37, 203.47it/s]

finished frames 9588000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598140/1666666 [2:27:49<05:30, 207.18it/s]

finished frames 9588600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598224/1666666 [2:27:50<05:33, 205.53it/s]

finished frames 9589200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598329/1666666 [2:27:50<05:33, 204.77it/s]

finished frames 9589800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598434/1666666 [2:27:51<05:31, 205.57it/s]

finished frames 9590400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598539/1666666 [2:27:51<05:30, 206.25it/s]

finished frames 9591000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598623/1666666 [2:27:52<05:30, 205.61it/s]

finished frames 9591600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598728/1666666 [2:27:52<05:28, 206.91it/s]

finished frames 9592200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598833/1666666 [2:27:53<05:30, 205.53it/s]

finished frames 9592800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1598938/1666666 [2:27:53<05:34, 202.37it/s]

finished frames 9593400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599023/1666666 [2:27:54<05:32, 203.55it/s]

finished frames 9594000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599133/1666666 [2:27:54<05:20, 210.49it/s]

finished frames 9594600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599221/1666666 [2:27:55<05:17, 212.64it/s]

finished frames 9595200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599330/1666666 [2:27:55<05:21, 209.58it/s]

finished frames 9595800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599440/1666666 [2:27:56<05:14, 213.79it/s]

finished frames 9596400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599528/1666666 [2:27:56<05:08, 217.52it/s]

finished frames 9597000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599638/1666666 [2:27:57<05:06, 218.76it/s]

finished frames 9597600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599726/1666666 [2:27:57<05:05, 219.39it/s]

finished frames 9598200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599836/1666666 [2:27:57<05:05, 218.60it/s]

finished frames 9598800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1599924/1666666 [2:27:58<05:06, 218.08it/s]

finished frames 9599400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600034/1666666 [2:27:58<05:13, 212.88it/s]

finished frames 9600000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600144/1666666 [2:27:59<05:09, 215.08it/s]

finished frames 9600600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600233/1666666 [2:27:59<05:10, 214.21it/s]

finished frames 9601200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600343/1666666 [2:28:00<05:13, 211.51it/s]

finished frames 9601800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600432/1666666 [2:28:00<05:05, 217.13it/s]

finished frames 9602400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600523/1666666 [2:28:01<05:01, 219.40it/s]

finished frames 9603000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600638/1666666 [2:28:01<04:58, 221.46it/s]

finished frames 9603600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600730/1666666 [2:28:02<05:00, 219.52it/s]

finished frames 9604200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600840/1666666 [2:28:02<05:02, 217.68it/s]

finished frames 9604800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1600928/1666666 [2:28:03<05:03, 216.29it/s]

finished frames 9605400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601038/1666666 [2:28:03<05:13, 209.59it/s]

finished frames 9606000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601125/1666666 [2:28:03<05:10, 210.87it/s]

finished frames 9606600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601235/1666666 [2:28:04<05:10, 211.01it/s]

finished frames 9607200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601323/1666666 [2:28:04<05:08, 211.95it/s]

finished frames 9607800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601433/1666666 [2:28:05<05:08, 211.15it/s]

finished frames 9608400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601543/1666666 [2:28:05<05:08, 211.19it/s]

finished frames 9609000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601631/1666666 [2:28:06<05:08, 211.08it/s]

finished frames 9609600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601741/1666666 [2:28:06<05:06, 211.51it/s]

finished frames 9610200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601829/1666666 [2:28:07<05:07, 211.02it/s]

finished frames 9610800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1601939/1666666 [2:28:07<05:11, 207.80it/s]

finished frames 9611400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602026/1666666 [2:28:08<05:15, 204.79it/s]

finished frames 9612000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602131/1666666 [2:28:08<05:10, 207.71it/s]

finished frames 9612600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602238/1666666 [2:28:09<05:08, 209.08it/s]

finished frames 9613200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602324/1666666 [2:28:09<05:07, 208.97it/s]

finished frames 9613800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602429/1666666 [2:28:10<05:07, 209.14it/s]

finished frames 9614400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602537/1666666 [2:28:10<05:06, 209.57it/s]

finished frames 9615000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602624/1666666 [2:28:11<05:05, 209.89it/s]

finished frames 9615600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602732/1666666 [2:28:11<05:05, 209.41it/s]

finished frames 9616200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602841/1666666 [2:28:12<05:04, 209.95it/s]

finished frames 9616800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1602926/1666666 [2:28:12<05:04, 209.27it/s]

finished frames 9617400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603031/1666666 [2:28:13<05:12, 203.62it/s]

finished frames 9618000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603139/1666666 [2:28:13<05:04, 208.59it/s]

finished frames 9618600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603224/1666666 [2:28:13<05:02, 209.52it/s]

finished frames 9619200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603333/1666666 [2:28:14<05:00, 210.49it/s]

finished frames 9619800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603442/1666666 [2:28:15<05:01, 209.63it/s]

finished frames 9620400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603526/1666666 [2:28:15<05:02, 208.77it/s]

finished frames 9621000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603632/1666666 [2:28:15<05:00, 209.64it/s]

finished frames 9621600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603741/1666666 [2:28:16<05:00, 209.48it/s]

finished frames 9622200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603825/1666666 [2:28:16<05:00, 208.86it/s]

finished frames 9622800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1603930/1666666 [2:28:17<05:01, 208.40it/s]

finished frames 9623400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1604037/1666666 [2:28:17<05:04, 205.94it/s]

finished frames 9624000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▌| 1604125/1666666 [2:28:18<04:56, 210.93it/s]

finished frames 9624600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1604235/1666666 [2:28:18<05:04, 204.73it/s]

finished frames 9625200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1604323/1666666 [2:28:19<05:20, 194.29it/s]

finished frames 9625800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1604432/1666666 [2:28:19<04:55, 210.26it/s]

finished frames 9626400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1604542/1666666 [2:28:20<04:50, 213.55it/s]

finished frames 9627000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1604630/1666666 [2:28:20<04:50, 213.33it/s]

finished frames 9627600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1604740/1666666 [2:28:21<04:49, 213.85it/s]

finished frames 9628200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1604828/1666666 [2:28:21<04:50, 212.81it/s]

finished frames 9628800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1604938/1666666 [2:28:22<04:48, 213.90it/s]

finished frames 9629400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605025/1666666 [2:28:22<04:56, 207.77it/s]

finished frames 9630000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605135/1666666 [2:28:23<04:50, 211.89it/s]

finished frames 9630600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605223/1666666 [2:28:23<04:49, 212.31it/s]

finished frames 9631200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605333/1666666 [2:28:24<04:47, 213.45it/s]

finished frames 9631800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605443/1666666 [2:28:24<04:48, 212.52it/s]

finished frames 9632400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605531/1666666 [2:28:24<04:49, 211.45it/s]

finished frames 9633000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605641/1666666 [2:28:25<04:47, 212.17it/s]

finished frames 9633600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605729/1666666 [2:28:25<04:46, 212.83it/s]

finished frames 9634200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605839/1666666 [2:28:26<04:44, 213.88it/s]

finished frames 9634800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1605927/1666666 [2:28:26<04:44, 213.31it/s]

finished frames 9635400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606038/1666666 [2:28:27<04:44, 213.07it/s]

finished frames 9636000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606126/1666666 [2:28:27<04:41, 215.02it/s]

finished frames 9636600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606236/1666666 [2:28:28<04:43, 212.80it/s]

finished frames 9637200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606324/1666666 [2:28:28<04:43, 212.89it/s]

finished frames 9637800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606434/1666666 [2:28:29<04:41, 213.66it/s]

finished frames 9638400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606544/1666666 [2:28:29<04:40, 214.14it/s]

finished frames 9639000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606631/1666666 [2:28:30<04:53, 204.81it/s]

finished frames 9639600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606719/1666666 [2:28:30<05:08, 194.50it/s]

finished frames 9640200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606829/1666666 [2:28:31<04:47, 208.24it/s]

finished frames 9640800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1606939/1666666 [2:28:31<04:44, 210.21it/s]

finished frames 9641400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607026/1666666 [2:28:32<04:50, 205.60it/s]

finished frames 9642000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607135/1666666 [2:28:32<04:42, 210.92it/s]

finished frames 9642600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607223/1666666 [2:28:32<04:40, 211.79it/s]

finished frames 9643200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607333/1666666 [2:28:33<04:38, 213.34it/s]

finished frames 9643800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607443/1666666 [2:28:33<04:38, 212.54it/s]

finished frames 9644400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607531/1666666 [2:28:34<04:39, 211.61it/s]

finished frames 9645000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607641/1666666 [2:28:34<04:39, 211.01it/s]

finished frames 9645600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607729/1666666 [2:28:35<04:39, 211.21it/s]

finished frames 9646200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607839/1666666 [2:28:35<04:37, 211.69it/s]

finished frames 9646800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1607927/1666666 [2:28:36<04:37, 211.59it/s]

finished frames 9647400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1608036/1666666 [2:28:36<04:47, 204.02it/s]

finished frames 9648000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1608141/1666666 [2:28:37<04:43, 206.54it/s]

finished frames 9648600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 96%|█████████▋| 1608226/1666666 [2:28:37<04:38, 209.49it/s]

finished frames 9649200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1608336/1666666 [2:28:38<04:36, 211.30it/s]

finished frames 9649800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1608424/1666666 [2:28:38<04:35, 211.24it/s]

finished frames 9650400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1608534/1666666 [2:28:39<04:35, 211.28it/s]

finished frames 9651000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1608622/1666666 [2:28:39<04:34, 211.47it/s]

finished frames 9651600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1608732/1666666 [2:28:40<04:32, 212.47it/s]

finished frames 9652200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1608842/1666666 [2:28:40<04:32, 212.11it/s]

finished frames 9652800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1608930/1666666 [2:28:41<04:32, 211.62it/s]

finished frames 9653400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609039/1666666 [2:28:41<04:44, 202.58it/s]

finished frames 9654000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609126/1666666 [2:28:42<04:35, 208.85it/s]

finished frames 9654600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609236/1666666 [2:28:42<04:30, 212.43it/s]

finished frames 9655200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609324/1666666 [2:28:42<04:30, 212.31it/s]

finished frames 9655800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609434/1666666 [2:28:43<04:28, 212.91it/s]

finished frames 9656400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609522/1666666 [2:28:43<04:28, 212.75it/s]

finished frames 9657000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609632/1666666 [2:28:44<04:26, 214.29it/s]

finished frames 9657600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609742/1666666 [2:28:44<04:26, 213.82it/s]

finished frames 9658200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609830/1666666 [2:28:45<04:25, 214.11it/s]

finished frames 9658800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1609940/1666666 [2:28:45<04:25, 213.31it/s]

finished frames 9659400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610027/1666666 [2:28:46<04:32, 207.49it/s]

finished frames 9660000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610135/1666666 [2:28:46<04:30, 208.69it/s]

finished frames 9660600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610241/1666666 [2:28:47<04:30, 208.98it/s]

finished frames 9661200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610329/1666666 [2:28:47<04:26, 211.52it/s]

finished frames 9661800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610439/1666666 [2:28:48<04:24, 212.61it/s]

finished frames 9662400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610527/1666666 [2:28:48<04:23, 212.92it/s]

finished frames 9663000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610637/1666666 [2:28:49<04:24, 212.03it/s]

finished frames 9663600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610725/1666666 [2:28:49<04:27, 209.48it/s]

finished frames 9664200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610831/1666666 [2:28:50<04:27, 208.98it/s]

finished frames 9664800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1610938/1666666 [2:28:50<04:26, 209.14it/s]

finished frames 9665400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611023/1666666 [2:28:50<04:31, 205.11it/s]

finished frames 9666000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611128/1666666 [2:28:51<04:27, 207.68it/s]

finished frames 9666600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611234/1666666 [2:28:52<04:25, 208.42it/s]

finished frames 9667200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611318/1666666 [2:28:52<04:27, 206.63it/s]

finished frames 9667800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611423/1666666 [2:28:52<04:34, 201.42it/s]

finished frames 9668400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611528/1666666 [2:28:53<04:29, 204.61it/s]

finished frames 9669000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611633/1666666 [2:28:53<04:28, 204.90it/s]

finished frames 9669600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611738/1666666 [2:28:54<04:26, 205.84it/s]

finished frames 9670200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611822/1666666 [2:28:54<04:26, 206.02it/s]

finished frames 9670800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1611927/1666666 [2:28:55<04:27, 204.38it/s]

finished frames 9671400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612032/1666666 [2:28:55<04:31, 200.97it/s]

finished frames 9672000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612137/1666666 [2:28:56<04:25, 205.14it/s]

finished frames 9672600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612242/1666666 [2:28:56<04:23, 206.53it/s]

finished frames 9673200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612326/1666666 [2:28:57<04:23, 206.11it/s]

finished frames 9673800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612431/1666666 [2:28:57<04:23, 205.58it/s]

finished frames 9674400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612536/1666666 [2:28:58<04:23, 205.72it/s]

finished frames 9675000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612641/1666666 [2:28:58<04:21, 206.47it/s]

finished frames 9675600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612725/1666666 [2:28:59<04:21, 205.98it/s]

finished frames 9676200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612830/1666666 [2:28:59<04:21, 205.87it/s]

finished frames 9676800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1612935/1666666 [2:29:00<04:21, 205.62it/s]

finished frames 9677400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613019/1666666 [2:29:00<04:36, 194.00it/s]

finished frames 9678000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613126/1666666 [2:29:01<04:18, 207.13it/s]

finished frames 9678600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613234/1666666 [2:29:01<04:15, 208.88it/s]

finished frames 9679200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613339/1666666 [2:29:02<04:16, 208.12it/s]

finished frames 9679800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613423/1666666 [2:29:02<04:16, 207.88it/s]

finished frames 9680400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613532/1666666 [2:29:03<04:12, 210.31it/s]

finished frames 9681000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613617/1666666 [2:29:03<04:13, 209.26it/s]

finished frames 9681600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613722/1666666 [2:29:04<04:19, 204.18it/s]

finished frames 9682200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613831/1666666 [2:29:04<04:10, 211.11it/s]

finished frames 9682800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1613941/1666666 [2:29:05<04:11, 209.57it/s]

finished frames 9683400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614025/1666666 [2:29:05<04:17, 204.44it/s]

finished frames 9684000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614133/1666666 [2:29:06<04:10, 209.43it/s]

finished frames 9684600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614240/1666666 [2:29:06<04:10, 209.22it/s]

finished frames 9685200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614325/1666666 [2:29:07<04:10, 209.23it/s]

finished frames 9685800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614430/1666666 [2:29:07<04:11, 207.91it/s]

finished frames 9686400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614535/1666666 [2:29:08<04:11, 207.37it/s]

finished frames 9687000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614640/1666666 [2:29:08<04:10, 207.81it/s]

finished frames 9687600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614724/1666666 [2:29:08<04:09, 208.34it/s]

finished frames 9688200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614829/1666666 [2:29:09<04:09, 208.10it/s]

finished frames 9688800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1614936/1666666 [2:29:09<04:08, 208.02it/s]

finished frames 9689400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615023/1666666 [2:29:10<04:10, 206.57it/s]

finished frames 9690000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615133/1666666 [2:29:10<04:04, 211.12it/s]

finished frames 9690600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615243/1666666 [2:29:11<04:01, 213.15it/s]

finished frames 9691200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615331/1666666 [2:29:11<03:59, 214.14it/s]

finished frames 9691800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615441/1666666 [2:29:12<03:59, 213.93it/s]

finished frames 9692400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615529/1666666 [2:29:12<04:00, 213.05it/s]

finished frames 9693000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615639/1666666 [2:29:13<03:59, 213.19it/s]

finished frames 9693600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615727/1666666 [2:29:13<03:59, 212.74it/s]

finished frames 9694200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615837/1666666 [2:29:14<03:59, 212.24it/s]

finished frames 9694800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1615925/1666666 [2:29:14<04:00, 211.19it/s]

finished frames 9695400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616034/1666666 [2:29:15<04:22, 192.79it/s]

finished frames 9696000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616120/1666666 [2:29:15<04:31, 186.30it/s]

finished frames 9696600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616230/1666666 [2:29:16<04:03, 207.29it/s]

finished frames 9697200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616340/1666666 [2:29:16<03:58, 211.07it/s]

finished frames 9697800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616428/1666666 [2:29:17<03:57, 211.16it/s]

finished frames 9698400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616538/1666666 [2:29:17<03:57, 211.08it/s]

finished frames 9699000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616626/1666666 [2:29:18<03:57, 211.01it/s]

finished frames 9699600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616736/1666666 [2:29:18<03:56, 210.97it/s]

finished frames 9700200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616824/1666666 [2:29:18<03:55, 211.27it/s]

finished frames 9700800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1616934/1666666 [2:29:19<03:55, 211.04it/s]

finished frames 9701400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617022/1666666 [2:29:19<04:02, 204.82it/s]

finished frames 9702000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617132/1666666 [2:29:20<03:55, 210.50it/s]

finished frames 9702600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617242/1666666 [2:29:20<03:53, 212.00it/s]

finished frames 9703200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617330/1666666 [2:29:21<03:53, 211.51it/s]

finished frames 9703800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617440/1666666 [2:29:21<03:52, 212.08it/s]

finished frames 9704400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617528/1666666 [2:29:22<03:51, 212.34it/s]

finished frames 9705000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617638/1666666 [2:29:22<03:51, 211.67it/s]

finished frames 9705600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617726/1666666 [2:29:23<03:51, 211.49it/s]

finished frames 9706200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617836/1666666 [2:29:23<03:50, 211.77it/s]

finished frames 9706800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1617924/1666666 [2:29:24<03:50, 211.76it/s]

finished frames 9707400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618033/1666666 [2:29:24<03:55, 206.49it/s]

finished frames 9708000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618138/1666666 [2:29:25<03:54, 206.55it/s]

finished frames 9708600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618222/1666666 [2:29:25<03:54, 206.35it/s]

finished frames 9709200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618328/1666666 [2:29:26<03:52, 207.82it/s]

finished frames 9709800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618433/1666666 [2:29:26<04:00, 200.92it/s]

finished frames 9710400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618540/1666666 [2:29:27<03:52, 206.95it/s]

finished frames 9711000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618624/1666666 [2:29:27<03:53, 206.07it/s]

finished frames 9711600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618729/1666666 [2:29:28<03:51, 206.66it/s]

finished frames 9712200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618834/1666666 [2:29:28<03:50, 207.19it/s]

finished frames 9712800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1618941/1666666 [2:29:29<03:48, 208.72it/s]

finished frames 9713400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619024/1666666 [2:29:29<04:08, 192.10it/s]

finished frames 9714000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619129/1666666 [2:29:30<03:54, 202.78it/s]

finished frames 9714600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619234/1666666 [2:29:30<03:51, 205.30it/s]

finished frames 9715200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619339/1666666 [2:29:31<03:49, 205.98it/s]

finished frames 9715800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619423/1666666 [2:29:31<03:49, 205.89it/s]

finished frames 9716400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619528/1666666 [2:29:31<03:48, 206.34it/s]

finished frames 9717000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619633/1666666 [2:29:32<03:47, 207.13it/s]

finished frames 9717600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619738/1666666 [2:29:32<03:47, 206.38it/s]

finished frames 9718200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619822/1666666 [2:29:33<03:47, 205.95it/s]

finished frames 9718800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1619927/1666666 [2:29:33<03:47, 205.87it/s]

finished frames 9719400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620032/1666666 [2:29:34<03:51, 201.11it/s]

finished frames 9720000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620137/1666666 [2:29:34<03:46, 205.57it/s]

finished frames 9720600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620242/1666666 [2:29:35<03:44, 206.42it/s]

finished frames 9721200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620326/1666666 [2:29:35<03:45, 205.92it/s]

finished frames 9721800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620431/1666666 [2:29:36<03:44, 205.74it/s]

finished frames 9722400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620536/1666666 [2:29:36<03:43, 206.27it/s]

finished frames 9723000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620641/1666666 [2:29:37<03:42, 206.88it/s]

finished frames 9723600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620725/1666666 [2:29:37<03:51, 198.15it/s]

finished frames 9724200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620830/1666666 [2:29:38<03:56, 193.53it/s]

finished frames 9724800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1620935/1666666 [2:29:38<03:43, 204.42it/s]

finished frames 9725400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621040/1666666 [2:29:39<03:45, 202.76it/s]

finished frames 9726000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621124/1666666 [2:29:39<03:41, 205.65it/s]

finished frames 9726600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621229/1666666 [2:29:40<03:40, 206.18it/s]

finished frames 9727200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621336/1666666 [2:29:40<03:36, 208.96it/s]

finished frames 9727800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621422/1666666 [2:29:41<03:36, 209.40it/s]

finished frames 9728400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621527/1666666 [2:29:41<03:39, 205.88it/s]

finished frames 9729000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621632/1666666 [2:29:42<03:38, 206.48it/s]

finished frames 9729600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621737/1666666 [2:29:42<03:37, 206.76it/s]

finished frames 9730200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621842/1666666 [2:29:43<03:36, 206.82it/s]

finished frames 9730800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1621926/1666666 [2:29:43<03:36, 206.78it/s]

finished frames 9731400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622031/1666666 [2:29:44<03:40, 202.40it/s]

finished frames 9732000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622136/1666666 [2:29:44<03:35, 206.64it/s]

finished frames 9732600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622241/1666666 [2:29:45<03:34, 206.79it/s]

finished frames 9733200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622325/1666666 [2:29:45<03:34, 206.84it/s]

finished frames 9733800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622430/1666666 [2:29:46<03:35, 205.75it/s]

finished frames 9734400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622535/1666666 [2:29:46<03:33, 206.70it/s]

finished frames 9735000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622640/1666666 [2:29:47<03:33, 206.30it/s]

finished frames 9735600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622724/1666666 [2:29:47<03:36, 203.03it/s]

finished frames 9736200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622830/1666666 [2:29:48<03:31, 207.42it/s]

finished frames 9736800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1622935/1666666 [2:29:48<03:30, 207.79it/s]

finished frames 9737400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623020/1666666 [2:29:48<03:50, 189.58it/s]

finished frames 9738000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623125/1666666 [2:29:49<04:11, 173.25it/s]

finished frames 9738600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623230/1666666 [2:29:50<03:36, 200.95it/s]

finished frames 9739200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623335/1666666 [2:29:50<03:30, 206.05it/s]

finished frames 9739800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623441/1666666 [2:29:51<03:28, 207.14it/s]

finished frames 9740400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623525/1666666 [2:29:51<03:27, 207.86it/s]

finished frames 9741000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623632/1666666 [2:29:51<03:25, 209.30it/s]

finished frames 9741600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623737/1666666 [2:29:52<03:26, 208.17it/s]

finished frames 9742200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623842/1666666 [2:29:52<03:25, 208.15it/s]

finished frames 9742800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1623926/1666666 [2:29:53<03:25, 207.73it/s]

finished frames 9743400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624031/1666666 [2:29:53<03:33, 200.09it/s]

finished frames 9744000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624140/1666666 [2:29:54<03:22, 210.41it/s]

finished frames 9744600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624226/1666666 [2:29:54<03:22, 209.52it/s]

finished frames 9745200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624334/1666666 [2:29:55<03:24, 206.63it/s]

finished frames 9745800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624439/1666666 [2:29:55<03:23, 207.20it/s]

finished frames 9746400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624527/1666666 [2:29:56<03:19, 211.15it/s]

finished frames 9747000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624637/1666666 [2:29:56<03:18, 212.02it/s]

finished frames 9747600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624725/1666666 [2:29:57<03:17, 212.18it/s]

finished frames 9748200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624835/1666666 [2:29:57<03:15, 213.45it/s]

finished frames 9748800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 97%|█████████▋| 1624923/1666666 [2:29:58<03:16, 212.11it/s]

finished frames 9749400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625033/1666666 [2:29:58<03:20, 207.98it/s]

finished frames 9750000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625143/1666666 [2:29:59<03:13, 215.09it/s]

finished frames 9750600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625231/1666666 [2:29:59<03:14, 213.47it/s]

finished frames 9751200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625341/1666666 [2:30:00<03:15, 211.39it/s]

finished frames 9751800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625427/1666666 [2:30:00<03:22, 203.60it/s]

finished frames 9752400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625537/1666666 [2:30:01<03:11, 214.46it/s]

finished frames 9753000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625625/1666666 [2:30:01<03:09, 216.89it/s]

finished frames 9753600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625735/1666666 [2:30:01<03:08, 217.30it/s]

finished frames 9754200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625823/1666666 [2:30:02<03:16, 207.76it/s]

finished frames 9754800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1625928/1666666 [2:30:02<03:18, 205.63it/s]

finished frames 9755400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626033/1666666 [2:30:03<03:25, 197.88it/s]

finished frames 9756000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626141/1666666 [2:30:03<03:14, 208.86it/s]

finished frames 9756600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626228/1666666 [2:30:04<03:11, 211.48it/s]

finished frames 9757200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626338/1666666 [2:30:04<03:09, 212.40it/s]

finished frames 9757800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626426/1666666 [2:30:05<03:08, 213.44it/s]

finished frames 9758400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626536/1666666 [2:30:05<03:09, 212.16it/s]

finished frames 9759000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626624/1666666 [2:30:06<03:08, 212.70it/s]

finished frames 9759600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626734/1666666 [2:30:06<03:08, 211.63it/s]

finished frames 9760200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626822/1666666 [2:30:07<03:08, 211.41it/s]

finished frames 9760800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1626932/1666666 [2:30:07<03:05, 214.18it/s]

finished frames 9761400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627020/1666666 [2:30:08<03:12, 206.35it/s]

finished frames 9762000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627130/1666666 [2:30:08<03:06, 211.95it/s]

finished frames 9762600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627240/1666666 [2:30:09<03:05, 212.14it/s]

finished frames 9763200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627328/1666666 [2:30:09<03:05, 212.03it/s]

finished frames 9763800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627438/1666666 [2:30:10<03:04, 212.05it/s]

finished frames 9764400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627526/1666666 [2:30:10<03:04, 211.63it/s]

finished frames 9765000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627636/1666666 [2:30:11<03:04, 211.33it/s]

finished frames 9765600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627724/1666666 [2:30:11<03:03, 212.53it/s]

finished frames 9766200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627834/1666666 [2:30:11<03:07, 207.64it/s]

finished frames 9766800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1627940/1666666 [2:30:12<03:06, 207.19it/s]

finished frames 9767400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628024/1666666 [2:30:12<03:11, 201.46it/s]

finished frames 9768000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628129/1666666 [2:30:13<03:07, 205.86it/s]

finished frames 9768600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628235/1666666 [2:30:13<03:05, 207.09it/s]

finished frames 9769200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628340/1666666 [2:30:14<03:04, 207.95it/s]

finished frames 9769800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628424/1666666 [2:30:14<03:03, 208.47it/s]

finished frames 9770400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628529/1666666 [2:30:15<03:03, 207.88it/s]

finished frames 9771000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628634/1666666 [2:30:15<03:02, 208.02it/s]

finished frames 9771600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628739/1666666 [2:30:16<03:02, 207.57it/s]

finished frames 9772200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628823/1666666 [2:30:16<03:02, 207.70it/s]

finished frames 9772800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1628928/1666666 [2:30:17<03:02, 207.35it/s]

finished frames 9773400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629033/1666666 [2:30:17<03:05, 203.42it/s]

finished frames 9774000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629138/1666666 [2:30:18<03:01, 206.66it/s]

finished frames 9774600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629222/1666666 [2:30:18<02:59, 208.21it/s]

finished frames 9775200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629328/1666666 [2:30:19<02:59, 208.01it/s]

finished frames 9775800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629433/1666666 [2:30:19<02:58, 208.28it/s]

finished frames 9776400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629538/1666666 [2:30:20<02:58, 207.97it/s]

finished frames 9777000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629622/1666666 [2:30:20<02:58, 207.39it/s]

finished frames 9777600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629727/1666666 [2:30:21<02:57, 208.24it/s]

finished frames 9778200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629832/1666666 [2:30:21<02:58, 206.70it/s]

finished frames 9778800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1629937/1666666 [2:30:22<02:57, 207.06it/s]

finished frames 9779400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630021/1666666 [2:30:22<03:03, 199.61it/s]

finished frames 9780000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630129/1666666 [2:30:23<02:55, 208.43it/s]

finished frames 9780600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630239/1666666 [2:30:23<02:52, 210.94it/s]

finished frames 9781200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630327/1666666 [2:30:23<02:51, 211.60it/s]

finished frames 9781800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630436/1666666 [2:30:24<02:53, 208.51it/s]

finished frames 9782400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630523/1666666 [2:30:24<02:51, 210.35it/s]

finished frames 9783000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630631/1666666 [2:30:25<02:51, 209.92it/s]

finished frames 9783600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630736/1666666 [2:30:25<02:52, 208.63it/s]

finished frames 9784200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630841/1666666 [2:30:26<02:52, 207.99it/s]

finished frames 9784800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1630925/1666666 [2:30:26<02:51, 208.60it/s]

finished frames 9785400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631030/1666666 [2:30:27<02:55, 202.90it/s]

finished frames 9786000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631139/1666666 [2:30:27<02:48, 210.80it/s]

finished frames 9786600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631227/1666666 [2:30:28<02:46, 212.47it/s]

finished frames 9787200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631337/1666666 [2:30:28<02:45, 213.23it/s]

finished frames 9787800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631425/1666666 [2:30:29<02:45, 212.87it/s]

finished frames 9788400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631535/1666666 [2:30:29<02:45, 212.41it/s]

finished frames 9789000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631623/1666666 [2:30:30<02:45, 212.32it/s]

finished frames 9789600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631733/1666666 [2:30:30<02:43, 213.46it/s]

finished frames 9790200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631843/1666666 [2:30:31<02:43, 213.09it/s]

finished frames 9790800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1631931/1666666 [2:30:31<02:43, 212.60it/s]

finished frames 9791400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632040/1666666 [2:30:32<02:47, 207.12it/s]

finished frames 9792000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632128/1666666 [2:30:32<02:44, 210.09it/s]

finished frames 9792600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632238/1666666 [2:30:33<02:42, 212.04it/s]

finished frames 9793200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632326/1666666 [2:30:33<02:41, 212.40it/s]

finished frames 9793800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632414/1666666 [2:30:33<02:40, 212.75it/s]

finished frames 9794400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632523/1666666 [2:30:34<02:49, 201.58it/s]

finished frames 9795000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632632/1666666 [2:30:34<02:45, 205.25it/s]

finished frames 9795600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632742/1666666 [2:30:35<02:41, 210.57it/s]

finished frames 9796200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632830/1666666 [2:30:35<02:40, 210.81it/s]

finished frames 9796800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1632940/1666666 [2:30:36<02:40, 210.61it/s]

finished frames 9797400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633027/1666666 [2:30:36<02:42, 206.77it/s]

finished frames 9798000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633137/1666666 [2:30:37<02:39, 210.24it/s]

finished frames 9798600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633225/1666666 [2:30:37<02:37, 211.78it/s]

finished frames 9799200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633331/1666666 [2:30:38<02:40, 208.10it/s]

finished frames 9799800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633436/1666666 [2:30:38<02:40, 207.58it/s]

finished frames 9800400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633541/1666666 [2:30:39<02:39, 207.47it/s]

finished frames 9801000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633625/1666666 [2:30:39<02:38, 208.26it/s]

finished frames 9801600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633734/1666666 [2:30:40<02:35, 212.04it/s]

finished frames 9802200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633822/1666666 [2:30:40<02:34, 212.43it/s]

finished frames 9802800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1633932/1666666 [2:30:41<02:33, 212.78it/s]

finished frames 9803400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634041/1666666 [2:30:41<02:37, 207.20it/s]

finished frames 9804000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634129/1666666 [2:30:42<02:34, 210.52it/s]

finished frames 9804600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634239/1666666 [2:30:42<02:32, 211.99it/s]

finished frames 9805200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634327/1666666 [2:30:43<02:32, 212.26it/s]

finished frames 9805800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634437/1666666 [2:30:43<02:32, 211.72it/s]

finished frames 9806400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634525/1666666 [2:30:43<02:30, 213.00it/s]

finished frames 9807000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634635/1666666 [2:30:44<02:30, 213.04it/s]

finished frames 9807600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634723/1666666 [2:30:44<02:30, 212.60it/s]

finished frames 9808200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634833/1666666 [2:30:45<02:38, 200.98it/s]

finished frames 9808800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1634920/1666666 [2:30:45<02:45, 191.84it/s]

finished frames 9809400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635026/1666666 [2:30:46<02:37, 200.64it/s]

finished frames 9810000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635135/1666666 [2:30:46<02:30, 208.87it/s]

finished frames 9810600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635242/1666666 [2:30:47<02:30, 208.74it/s]

finished frames 9811200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635329/1666666 [2:30:47<02:29, 209.96it/s]

finished frames 9811800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635435/1666666 [2:30:48<02:29, 208.74it/s]

finished frames 9812400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635523/1666666 [2:30:48<02:28, 209.18it/s]

finished frames 9813000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635633/1666666 [2:30:49<02:26, 211.44it/s]

finished frames 9813600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635743/1666666 [2:30:49<02:26, 210.43it/s]

finished frames 9814200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635830/1666666 [2:30:50<02:27, 208.78it/s]

finished frames 9814800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1635938/1666666 [2:30:50<02:26, 209.91it/s]

finished frames 9815400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636024/1666666 [2:30:51<02:30, 204.16it/s]

finished frames 9816000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636130/1666666 [2:30:51<02:27, 207.14it/s]

finished frames 9816600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636236/1666666 [2:30:52<02:25, 208.59it/s]

finished frames 9817200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636323/1666666 [2:30:52<02:24, 210.07it/s]

finished frames 9817800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636433/1666666 [2:30:53<02:24, 209.84it/s]

finished frames 9818400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636543/1666666 [2:30:53<02:23, 210.61it/s]

finished frames 9819000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636631/1666666 [2:30:54<02:23, 209.63it/s]

finished frames 9819600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636740/1666666 [2:30:54<02:22, 210.18it/s]

finished frames 9820200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636828/1666666 [2:30:55<02:21, 210.55it/s]

finished frames 9820800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1636938/1666666 [2:30:55<02:21, 210.11it/s]

finished frames 9821400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637025/1666666 [2:30:55<02:24, 204.69it/s]

finished frames 9822000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637132/1666666 [2:30:56<02:21, 208.34it/s]

finished frames 9822600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637238/1666666 [2:30:57<02:23, 204.59it/s]

finished frames 9823200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637322/1666666 [2:30:57<02:27, 198.59it/s]

finished frames 9823800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637429/1666666 [2:30:57<02:21, 206.87it/s]

finished frames 9824400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637537/1666666 [2:30:58<02:18, 209.68it/s]

finished frames 9825000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637625/1666666 [2:30:58<02:18, 209.43it/s]

finished frames 9825600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637734/1666666 [2:30:59<02:18, 209.52it/s]

finished frames 9826200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637840/1666666 [2:30:59<02:17, 209.07it/s]

finished frames 9826800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1637926/1666666 [2:31:00<02:16, 209.93it/s]

finished frames 9827400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638031/1666666 [2:31:00<02:20, 204.42it/s]

finished frames 9828000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638136/1666666 [2:31:01<02:17, 207.06it/s]

finished frames 9828600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638241/1666666 [2:31:01<02:16, 207.95it/s]

finished frames 9829200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638325/1666666 [2:31:02<02:15, 208.58it/s]

finished frames 9829800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638431/1666666 [2:31:02<02:15, 208.10it/s]

finished frames 9830400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638537/1666666 [2:31:03<02:15, 207.32it/s]

finished frames 9831000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638642/1666666 [2:31:03<02:15, 206.26it/s]

finished frames 9831600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638726/1666666 [2:31:04<02:15, 206.39it/s]

finished frames 9832200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638831/1666666 [2:31:04<02:15, 205.03it/s]

finished frames 9832800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1638936/1666666 [2:31:05<02:14, 206.03it/s]

finished frames 9833400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639020/1666666 [2:31:05<02:18, 200.14it/s]

finished frames 9834000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639125/1666666 [2:31:06<02:14, 204.76it/s]

finished frames 9834600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639230/1666666 [2:31:06<02:13, 205.11it/s]

finished frames 9835200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639335/1666666 [2:31:07<02:12, 206.05it/s]

finished frames 9835800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639440/1666666 [2:31:07<02:12, 206.14it/s]

finished frames 9836400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639525/1666666 [2:31:08<02:15, 200.42it/s]

finished frames 9837000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639633/1666666 [2:31:08<02:20, 192.51it/s]

finished frames 9837600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639742/1666666 [2:31:09<02:09, 207.32it/s]

finished frames 9838200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639830/1666666 [2:31:09<02:07, 211.12it/s]

finished frames 9838800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1639940/1666666 [2:31:10<02:06, 210.92it/s]

finished frames 9839400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640027/1666666 [2:31:10<02:08, 207.12it/s]

finished frames 9840000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640136/1666666 [2:31:11<02:06, 209.90it/s]

finished frames 9840600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640224/1666666 [2:31:11<02:05, 210.01it/s]

finished frames 9841200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640334/1666666 [2:31:11<02:02, 214.19it/s]

finished frames 9841800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640422/1666666 [2:31:12<02:03, 213.17it/s]

finished frames 9842400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640532/1666666 [2:31:12<02:03, 212.29it/s]

finished frames 9843000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640642/1666666 [2:31:13<02:03, 211.12it/s]

finished frames 9843600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640730/1666666 [2:31:13<02:02, 211.51it/s]

finished frames 9844200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640840/1666666 [2:31:14<02:01, 211.88it/s]

finished frames 9844800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1640928/1666666 [2:31:14<02:01, 212.33it/s]

finished frames 9845400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1641038/1666666 [2:31:15<02:03, 207.33it/s]

finished frames 9846000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1641126/1666666 [2:31:15<02:01, 210.13it/s]

finished frames 9846600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1641236/1666666 [2:31:16<02:00, 211.63it/s]

finished frames 9847200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1641324/1666666 [2:31:16<01:59, 211.66it/s]

finished frames 9847800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1641434/1666666 [2:31:17<01:58, 212.77it/s]

finished frames 9848400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1641522/1666666 [2:31:17<01:58, 212.87it/s]

finished frames 9849000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 98%|█████████▊| 1641632/1666666 [2:31:18<01:57, 212.84it/s]

finished frames 9849600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1641742/1666666 [2:31:18<01:57, 211.78it/s]

finished frames 9850200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1641830/1666666 [2:31:19<01:57, 210.65it/s]

finished frames 9850800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1641940/1666666 [2:31:19<01:58, 208.35it/s]

finished frames 9851400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642024/1666666 [2:31:20<02:06, 194.28it/s]

finished frames 9852000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642133/1666666 [2:31:20<01:57, 207.98it/s]

finished frames 9852600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642243/1666666 [2:31:21<01:55, 211.04it/s]

finished frames 9853200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642331/1666666 [2:31:21<01:54, 212.95it/s]

finished frames 9853800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642441/1666666 [2:31:22<01:54, 211.43it/s]

finished frames 9854400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642529/1666666 [2:31:22<01:54, 211.20it/s]

finished frames 9855000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642639/1666666 [2:31:22<01:53, 212.27it/s]

finished frames 9855600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642727/1666666 [2:31:23<01:53, 211.71it/s]

finished frames 9856200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642837/1666666 [2:31:23<01:52, 211.60it/s]

finished frames 9856800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1642925/1666666 [2:31:24<01:51, 212.79it/s]

finished frames 9857400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643034/1666666 [2:31:24<01:53, 207.36it/s]

finished frames 9858000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643140/1666666 [2:31:25<01:53, 207.61it/s]

finished frames 9858600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643228/1666666 [2:31:25<01:51, 209.81it/s]

finished frames 9859200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643333/1666666 [2:31:26<01:54, 203.79it/s]

finished frames 9859800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643438/1666666 [2:31:26<01:55, 201.63it/s]

finished frames 9860400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643522/1666666 [2:31:27<01:54, 202.42it/s]

finished frames 9861000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643630/1666666 [2:31:27<01:50, 209.17it/s]

finished frames 9861600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643740/1666666 [2:31:28<01:48, 212.00it/s]

finished frames 9862200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643828/1666666 [2:31:28<01:48, 211.29it/s]

finished frames 9862800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1643937/1666666 [2:31:29<01:49, 208.40it/s]

finished frames 9863400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644021/1666666 [2:31:29<01:53, 199.40it/s]

finished frames 9864000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644129/1666666 [2:31:30<01:47, 209.76it/s]

finished frames 9864600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644238/1666666 [2:31:30<01:50, 202.97it/s]

finished frames 9865200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644326/1666666 [2:31:31<02:04, 179.29it/s]

finished frames 9865800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644435/1666666 [2:31:31<01:47, 205.84it/s]

finished frames 9866400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644523/1666666 [2:31:32<01:45, 209.78it/s]

finished frames 9867000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644633/1666666 [2:31:32<01:43, 212.58it/s]

finished frames 9867600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644743/1666666 [2:31:33<01:43, 211.32it/s]

finished frames 9868200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644831/1666666 [2:31:33<01:43, 210.43it/s]

finished frames 9868800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1644941/1666666 [2:31:34<01:43, 210.79it/s]

finished frames 9869400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1645028/1666666 [2:31:34<01:44, 206.47it/s]

finished frames 9870000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1645136/1666666 [2:31:34<01:42, 210.01it/s]

finished frames 9870600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1645224/1666666 [2:31:35<01:41, 210.81it/s]

finished frames 9871200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1645334/1666666 [2:31:35<01:40, 212.01it/s]

finished frames 9871800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1645444/1666666 [2:31:36<01:39, 213.64it/s]

finished frames 9872400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1645532/1666666 [2:31:36<01:39, 213.47it/s]

finished frames 9873000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1645642/1666666 [2:31:37<01:38, 213.02it/s]

finished frames 9873600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▊| 1645730/1666666 [2:31:37<01:38, 213.30it/s]

finished frames 9874200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1645840/1666666 [2:31:38<01:38, 211.84it/s]

finished frames 9874800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1645928/1666666 [2:31:38<01:37, 211.74it/s]

finished frames 9875400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646037/1666666 [2:31:39<01:39, 206.50it/s]

finished frames 9876000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646125/1666666 [2:31:39<01:37, 209.97it/s]

finished frames 9876600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646235/1666666 [2:31:40<01:37, 210.15it/s]

finished frames 9877200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646323/1666666 [2:31:40<01:36, 210.66it/s]

finished frames 9877800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646433/1666666 [2:31:41<01:36, 210.74it/s]

finished frames 9878400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646543/1666666 [2:31:41<01:35, 210.53it/s]

finished frames 9879000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646630/1666666 [2:31:42<01:36, 208.36it/s]

finished frames 9879600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646736/1666666 [2:31:42<01:41, 196.27it/s]

finished frames 9880200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646824/1666666 [2:31:43<01:35, 208.00it/s]

finished frames 9880800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1646933/1666666 [2:31:43<01:33, 210.54it/s]

finished frames 9881400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647021/1666666 [2:31:43<01:35, 205.45it/s]

finished frames 9882000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647130/1666666 [2:31:44<01:31, 213.34it/s]

finished frames 9882600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647240/1666666 [2:31:45<01:30, 213.58it/s]

finished frames 9883200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647328/1666666 [2:31:45<01:29, 215.08it/s]

finished frames 9883800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647438/1666666 [2:31:45<01:29, 213.66it/s]

finished frames 9884400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647526/1666666 [2:31:46<01:29, 214.40it/s]

finished frames 9885000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647636/1666666 [2:31:46<01:29, 213.81it/s]

finished frames 9885600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647724/1666666 [2:31:47<01:28, 213.49it/s]

finished frames 9886200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647834/1666666 [2:31:47<01:28, 213.34it/s]

finished frames 9886800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1647944/1666666 [2:31:48<01:27, 214.61it/s]

finished frames 9887400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648032/1666666 [2:31:48<01:28, 210.04it/s]

finished frames 9888000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648142/1666666 [2:31:49<01:27, 212.52it/s]

finished frames 9888600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648230/1666666 [2:31:49<01:26, 213.92it/s]

finished frames 9889200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648340/1666666 [2:31:50<01:25, 215.07it/s]

finished frames 9889800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648428/1666666 [2:31:50<01:24, 215.53it/s]

finished frames 9890400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648538/1666666 [2:31:51<01:24, 213.29it/s]

finished frames 9891000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648626/1666666 [2:31:51<01:24, 213.84it/s]

finished frames 9891600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648736/1666666 [2:31:52<01:24, 212.60it/s]

finished frames 9892200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648824/1666666 [2:31:52<01:23, 213.52it/s]

finished frames 9892800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1648934/1666666 [2:31:52<01:22, 214.14it/s]

finished frames 9893400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649021/1666666 [2:31:53<01:28, 199.53it/s]

finished frames 9894000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649131/1666666 [2:31:53<01:24, 208.01it/s]

finished frames 9894600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649239/1666666 [2:31:54<01:22, 210.79it/s]

finished frames 9895200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649327/1666666 [2:31:54<01:22, 211.22it/s]

finished frames 9895800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649437/1666666 [2:31:55<01:21, 210.53it/s]

finished frames 9896400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649523/1666666 [2:31:55<01:21, 209.44it/s]

finished frames 9897000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649631/1666666 [2:31:56<01:20, 211.15it/s]

finished frames 9897600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649741/1666666 [2:31:56<01:19, 213.99it/s]

finished frames 9898200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649829/1666666 [2:31:57<01:18, 213.82it/s]

finished frames 9898800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1649939/1666666 [2:31:57<01:18, 213.43it/s]

finished frames 9899400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650027/1666666 [2:31:58<01:19, 208.67it/s]

finished frames 9900000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650137/1666666 [2:31:58<01:18, 211.60it/s]

finished frames 9900600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650225/1666666 [2:31:59<01:16, 214.53it/s]

finished frames 9901200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650335/1666666 [2:31:59<01:17, 211.99it/s]

finished frames 9901800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650423/1666666 [2:32:00<01:16, 213.09it/s]

finished frames 9902400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650533/1666666 [2:32:00<01:16, 210.69it/s]

finished frames 9903000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650643/1666666 [2:32:01<01:14, 216.25it/s]

finished frames 9903600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650731/1666666 [2:32:01<01:13, 218.14it/s]

finished frames 9904200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650841/1666666 [2:32:01<01:12, 218.40it/s]

finished frames 9904800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1650929/1666666 [2:32:02<01:12, 218.02it/s]

finished frames 9905400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651039/1666666 [2:32:02<01:14, 210.32it/s]

finished frames 9906000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651127/1666666 [2:32:03<01:12, 213.06it/s]

finished frames 9906600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651237/1666666 [2:32:03<01:12, 214.01it/s]

finished frames 9907200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651325/1666666 [2:32:04<01:11, 214.08it/s]

finished frames 9907800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651432/1666666 [2:32:04<01:14, 205.40it/s]

finished frames 9908400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651540/1666666 [2:32:05<01:13, 205.64it/s]

finished frames 9909000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651628/1666666 [2:32:05<01:11, 209.13it/s]

finished frames 9909600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651738/1666666 [2:32:06<01:10, 211.32it/s]

finished frames 9910200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651826/1666666 [2:32:06<01:09, 212.80it/s]

finished frames 9910800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1651936/1666666 [2:32:07<01:09, 212.07it/s]

finished frames 9911400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652023/1666666 [2:32:07<01:10, 207.51it/s]

finished frames 9912000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652132/1666666 [2:32:08<01:08, 211.29it/s]

finished frames 9912600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652242/1666666 [2:32:08<01:08, 209.92it/s]

finished frames 9913200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652327/1666666 [2:32:09<01:08, 207.84it/s]

finished frames 9913800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652432/1666666 [2:32:09<01:09, 206.14it/s]

finished frames 9914400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652537/1666666 [2:32:10<01:08, 206.21it/s]

finished frames 9915000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652642/1666666 [2:32:10<01:08, 206.05it/s]

finished frames 9915600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652726/1666666 [2:32:10<01:07, 207.14it/s]

finished frames 9916200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652831/1666666 [2:32:11<01:07, 206.31it/s]

finished frames 9916800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1652936/1666666 [2:32:11<01:06, 205.59it/s]

finished frames 9917400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653020/1666666 [2:32:12<01:08, 200.33it/s]

finished frames 9918000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653125/1666666 [2:32:12<01:05, 206.39it/s]

finished frames 9918600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653230/1666666 [2:32:13<01:05, 206.60it/s]

finished frames 9919200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653335/1666666 [2:32:13<01:04, 206.29it/s]

finished frames 9919800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653440/1666666 [2:32:14<01:04, 205.81it/s]

finished frames 9920400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653524/1666666 [2:32:14<01:03, 206.69it/s]

finished frames 9921000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653629/1666666 [2:32:15<01:03, 205.53it/s]

finished frames 9921600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653734/1666666 [2:32:15<01:04, 199.59it/s]

finished frames 9922200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653839/1666666 [2:32:16<01:02, 206.54it/s]

finished frames 9922800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1653927/1666666 [2:32:16<01:00, 211.61it/s]

finished frames 9923400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654036/1666666 [2:32:17<01:00, 208.25it/s]

finished frames 9924000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654123/1666666 [2:32:17<00:59, 209.55it/s]

finished frames 9924600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654233/1666666 [2:32:18<00:59, 210.60it/s]

finished frames 9925200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654339/1666666 [2:32:18<00:59, 207.61it/s]

finished frames 9925800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654423/1666666 [2:32:19<00:59, 206.62it/s]

finished frames 9926400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654532/1666666 [2:32:19<00:56, 214.69it/s]

finished frames 9927000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654642/1666666 [2:32:20<00:55, 215.37it/s]

finished frames 9927600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654730/1666666 [2:32:20<00:55, 213.79it/s]

finished frames 9928200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654840/1666666 [2:32:21<00:55, 214.05it/s]

finished frames 9928800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1654928/1666666 [2:32:21<00:54, 213.60it/s]

finished frames 9929400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655037/1666666 [2:32:22<00:55, 208.61it/s]

finished frames 9930000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655125/1666666 [2:32:22<00:54, 212.79it/s]

finished frames 9930600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655235/1666666 [2:32:22<00:53, 213.27it/s]

finished frames 9931200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655323/1666666 [2:32:23<00:53, 211.73it/s]

finished frames 9931800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655433/1666666 [2:32:23<00:53, 211.85it/s]

finished frames 9932400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655543/1666666 [2:32:24<00:52, 211.42it/s]

finished frames 9933000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655631/1666666 [2:32:24<00:52, 211.41it/s]

finished frames 9933600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655741/1666666 [2:32:25<00:51, 212.11it/s]

finished frames 9934200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655829/1666666 [2:32:25<00:51, 211.57it/s]

finished frames 9934800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1655939/1666666 [2:32:26<00:50, 211.51it/s]

finished frames 9935400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656026/1666666 [2:32:26<00:51, 206.88it/s]

finished frames 9936000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656134/1666666 [2:32:27<00:51, 206.40it/s]

finished frames 9936600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656242/1666666 [2:32:27<00:51, 204.33it/s]

finished frames 9937200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656330/1666666 [2:32:28<00:48, 211.08it/s]

finished frames 9937800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656440/1666666 [2:32:28<00:48, 211.67it/s]

finished frames 9938400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656528/1666666 [2:32:29<00:47, 212.67it/s]

finished frames 9939000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656638/1666666 [2:32:29<00:47, 212.01it/s]

finished frames 9939600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656726/1666666 [2:32:30<00:46, 213.04it/s]

finished frames 9940200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656836/1666666 [2:32:30<00:46, 212.64it/s]

finished frames 9940800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1656924/1666666 [2:32:31<00:45, 212.46it/s]

finished frames 9941400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657033/1666666 [2:32:31<00:46, 208.05it/s]

finished frames 9942000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657142/1666666 [2:32:32<00:45, 210.99it/s]

finished frames 9942600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657230/1666666 [2:32:32<00:44, 211.38it/s]

finished frames 9943200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657340/1666666 [2:32:32<00:43, 212.03it/s]

finished frames 9943800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657427/1666666 [2:32:33<00:44, 207.94it/s]

finished frames 9944400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657534/1666666 [2:32:33<00:43, 209.05it/s]

finished frames 9945000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657622/1666666 [2:32:34<00:42, 211.71it/s]

finished frames 9945600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657732/1666666 [2:32:34<00:41, 214.70it/s]

finished frames 9946200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657842/1666666 [2:32:35<00:41, 213.84it/s]

finished frames 9946800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1657930/1666666 [2:32:35<00:41, 212.74it/s]

finished frames 9947400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1658040/1666666 [2:32:36<00:41, 207.40it/s]

finished frames 9948000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1658128/1666666 [2:32:36<00:40, 211.86it/s]

finished frames 9948600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1658238/1666666 [2:32:37<00:39, 212.96it/s]

finished frames 9949200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


 99%|█████████▉| 1658326/1666666 [2:32:37<00:39, 213.59it/s]

finished frames 9949800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1658436/1666666 [2:32:38<00:38, 213.12it/s]

finished frames 9950400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1658524/1666666 [2:32:38<00:38, 213.30it/s]

finished frames 9951000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1658634/1666666 [2:32:39<00:37, 211.51it/s]

finished frames 9951600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1658741/1666666 [2:32:39<00:38, 208.05it/s]

finished frames 9952200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1658825/1666666 [2:32:40<00:37, 207.92it/s]

finished frames 9952800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1658930/1666666 [2:32:40<00:37, 207.70it/s]

finished frames 9953400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659035/1666666 [2:32:41<00:37, 201.72it/s]

finished frames 9954000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659140/1666666 [2:32:41<00:36, 206.55it/s]

finished frames 9954600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659224/1666666 [2:32:41<00:35, 207.70it/s]

finished frames 9955200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659329/1666666 [2:32:42<00:35, 207.68it/s]

finished frames 9955800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659434/1666666 [2:32:42<00:34, 206.71it/s]

finished frames 9956400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659540/1666666 [2:32:43<00:34, 208.19it/s]

finished frames 9957000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659624/1666666 [2:32:43<00:33, 208.53it/s]

finished frames 9957600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659729/1666666 [2:32:44<00:33, 208.45it/s]

finished frames 9958200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659835/1666666 [2:32:44<00:32, 207.89it/s]

finished frames 9958800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1659940/1666666 [2:32:45<00:32, 208.42it/s]

finished frames 9959400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660025/1666666 [2:32:45<00:32, 202.94it/s]

finished frames 9960000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660130/1666666 [2:32:46<00:31, 205.32it/s]

finished frames 9960600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660235/1666666 [2:32:46<00:31, 206.21it/s]

finished frames 9961200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660340/1666666 [2:32:47<00:30, 204.96it/s]

finished frames 9961800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660424/1666666 [2:32:47<00:30, 206.50it/s]

finished frames 9962400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660529/1666666 [2:32:48<00:29, 205.97it/s]

finished frames 9963000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660634/1666666 [2:32:48<00:29, 204.10it/s]

finished frames 9963600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660742/1666666 [2:32:49<00:28, 209.54it/s]

finished frames 9964200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660826/1666666 [2:32:49<00:28, 207.76it/s]

finished frames 9964800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1660931/1666666 [2:32:50<00:28, 197.97it/s]

finished frames 9965400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661036/1666666 [2:32:50<00:27, 201.77it/s]

finished frames 9966000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661141/1666666 [2:32:51<00:26, 206.28it/s]

finished frames 9966600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661229/1666666 [2:32:51<00:25, 211.04it/s]

finished frames 9967200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661339/1666666 [2:32:52<00:25, 211.13it/s]

finished frames 9967800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661427/1666666 [2:32:52<00:24, 210.43it/s]

finished frames 9968400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661537/1666666 [2:32:53<00:24, 210.80it/s]

finished frames 9969000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661625/1666666 [2:32:53<00:23, 210.15it/s]

finished frames 9969600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661735/1666666 [2:32:54<00:23, 211.01it/s]

finished frames 9970200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661823/1666666 [2:32:54<00:22, 212.83it/s]

finished frames 9970800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1661933/1666666 [2:32:54<00:22, 212.85it/s]

finished frames 9971400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662021/1666666 [2:32:55<00:22, 205.47it/s]

finished frames 9972000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662131/1666666 [2:32:55<00:21, 212.66it/s]

finished frames 9972600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662241/1666666 [2:32:56<00:20, 213.23it/s]

finished frames 9973200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662329/1666666 [2:32:56<00:20, 214.34it/s]

finished frames 9973800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662439/1666666 [2:32:57<00:19, 214.33it/s]

finished frames 9974400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662527/1666666 [2:32:57<00:19, 214.32it/s]

finished frames 9975000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662637/1666666 [2:32:58<00:18, 213.15it/s]

finished frames 9975600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662725/1666666 [2:32:58<00:18, 214.15it/s]

finished frames 9976200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662835/1666666 [2:32:59<00:17, 213.69it/s]

finished frames 9976800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1662923/1666666 [2:32:59<00:17, 214.62it/s]

finished frames 9977400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663033/1666666 [2:33:00<00:17, 209.16it/s]

finished frames 9978000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663143/1666666 [2:33:00<00:16, 214.01it/s]

finished frames 9978600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663231/1666666 [2:33:01<00:16, 207.03it/s]

finished frames 9979200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663341/1666666 [2:33:01<00:15, 212.59it/s]

finished frames 9979800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663429/1666666 [2:33:02<00:15, 214.34it/s]

finished frames 9980400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663539/1666666 [2:33:02<00:14, 213.64it/s]

finished frames 9981000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663627/1666666 [2:33:02<00:14, 214.32it/s]

finished frames 9981600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663737/1666666 [2:33:03<00:13, 213.80it/s]

finished frames 9982200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663825/1666666 [2:33:03<00:13, 214.13it/s]

finished frames 9982800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1663935/1666666 [2:33:04<00:12, 211.56it/s]

finished frames 9983400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664022/1666666 [2:33:04<00:12, 206.81it/s]

finished frames 9984000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664132/1666666 [2:33:05<00:12, 210.22it/s]

finished frames 9984600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664242/1666666 [2:33:05<00:11, 211.93it/s]

finished frames 9985200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664330/1666666 [2:33:06<00:11, 212.12it/s]

finished frames 9985800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664440/1666666 [2:33:06<00:10, 211.52it/s]

finished frames 9986400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664528/1666666 [2:33:07<00:10, 210.48it/s]

finished frames 9987000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664638/1666666 [2:33:07<00:09, 211.63it/s]

finished frames 9987600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664726/1666666 [2:33:08<00:09, 211.15it/s]

finished frames 9988200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664836/1666666 [2:33:08<00:08, 211.06it/s]

finished frames 9988800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1664924/1666666 [2:33:09<00:08, 211.33it/s]

finished frames 9989400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665033/1666666 [2:33:09<00:07, 206.50it/s]

finished frames 9990000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665142/1666666 [2:33:10<00:07, 210.54it/s]

finished frames 9990600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665230/1666666 [2:33:10<00:06, 211.50it/s]

finished frames 9991200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665340/1666666 [2:33:11<00:06, 215.45it/s]

finished frames 9991800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665428/1666666 [2:33:11<00:05, 215.45it/s]

finished frames 9992400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665516/1666666 [2:33:11<00:05, 213.72it/s]

finished frames 9993000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665626/1666666 [2:33:12<00:05, 207.92it/s]

finished frames 9993600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665736/1666666 [2:33:12<00:04, 210.60it/s]

finished frames 9994200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665824/1666666 [2:33:13<00:03, 210.89it/s]

finished frames 9994800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1665934/1666666 [2:33:13<00:03, 211.32it/s]

finished frames 9995400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1666022/1666666 [2:33:14<00:03, 202.70it/s]

finished frames 9996000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1666131/1666666 [2:33:14<00:02, 209.28it/s]

finished frames 9996600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1666236/1666666 [2:33:15<00:02, 208.37it/s]

finished frames 9997200, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1666341/1666666 [2:33:15<00:01, 207.89it/s]

finished frames 9997800, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1666425/1666666 [2:33:16<00:01, 208.65it/s]

finished frames 9998400, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1666531/1666666 [2:33:16<00:00, 208.28it/s]

finished frames 9999000, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|█████████▉| 1666641/1666666 [2:33:17<00:00, 210.08it/s]

finished frames 9999600, mean/median reward -2.6/-2.7, min/max reward -2.7/-2.5


100%|██████████| 1666666/1666666 [2:33:17<00:00, 181.21it/s]


後処理

# 実行する

In [17]:
# エージェントが持つ頭脳となるクラスを定義、全エージェントで共有する


class Brain_play(object):
    def __init__(self, actor_critic,filename='weight_end.pth'):

        self.actor_critic = actor_critic  # actor_criticはクラスNetのディープ・ニューラルネットワーク

        # 結合パラメータをロードする場合
        #filename = 'weight_end.pth'
        #filename = 'weight_112500.pth'
        param = torch.load(filename, map_location='cpu')
        self.actor_critic.load_state_dict(param)

        # パラメータ更新の勾配法の設定
        self.optimizer = optim.RMSprop(
            actor_critic.parameters(), lr=lr, eps=eps, alpha=alpha)

    def update(self, rollouts):
        '''advanced計算した5つのstepの全てを使って更新します'''
        obs_shape = rollouts.observations.size()[2:]  # torch.Size([4, 84, 84])
        num_steps = NUM_ADVANCED_STEP
        num_processes = NUM_PROCESSES

        values, action_log_probs, dist_entropy = self.actor_critic.evaluate_actions(
            rollouts.observations[:-1].view(-1, *obs_shape),
            rollouts.actions.view(-1, 1))

        # 注意：各変数のサイズ
        # rollouts.observations[:-1].view(-1, *obs_shape) torch.Size([80, 4, 84, 84])
        # rollouts.actions.view(-1, 1) torch.Size([80, 1])
        # values torch.Size([80, 1])
        # action_log_probs torch.Size([80, 1])
        # dist_entropy torch.Size([])

        values = values.view(num_steps, num_processes,
                             1)  # torch.Size([5, 16, 1])
        action_log_probs = action_log_probs.view(num_steps, num_processes, 1)

        advantages = rollouts.returns[:-1] - values  # torch.Size([5, 16, 1])
        value_loss = advantages.pow(2).mean()

        action_gain = (advantages.detach() * action_log_probs).mean()
        # detachしてadvantagesを定数として扱う

        total_loss = (value_loss * value_loss_coef -
                      action_gain - dist_entropy * entropy_coef)

        self.optimizer.zero_grad()  # 勾配をリセット
        total_loss.backward()  # バックプロパゲーションを計算
        nn.utils.clip_grad_norm_(self.actor_critic.parameters(), max_grad_norm)
        #  一気に結合パラメータが変化しすぎないように、勾配の大きさは最大0.5までにする

        self.optimizer.step()  # 結合パラメータを更新


In [18]:
# 実行用の関数

NUM_PROCESSES = 1

# 流体版

# seedの設定
seed_num = 1
torch.manual_seed(seed_num)
if use_cuda:
    torch.cuda.manual_seed(seed_num)

# 実行環境を構築
torch.set_num_threads(seed_num)
Env_play = makecase(NUM_PROCESSES, casename='play',stride=STRIDE, end=END,
                xCells=XCELLS, insert_list = INSERT_LIST)

# 全エージェントが共有して持つ頭脳Brainを生成
n_out = Env_play[0].action_space.shape[0]  # 行動の種類は27
actor_critic = Net(n_out).to(device)  # GPUへ
global_brain = Brain_play(actor_critic)

# 格納用変数の生成
obs_shape = Env_play[0].observation_space.shape  # (3, 40, 12)
#obs_shape = (obs_shape[0] * NUM_STACK_FRAME,
#             *obs_shape[1:])  # (4, 84, 84)
# 状態数は一個でやる。よって、current_obsはそのままの値を格納。

# torch.Size([16, 3, 40, 12)
current_obs = torch.zeros(NUM_PROCESSES, *obs_shape).to(device)
rollouts = RolloutStorage(
    NUM_ADVANCED_STEP, NUM_PROCESSES, obs_shape)  # rolloutsのオブジェクト
episode_rewards = torch.zeros([NUM_PROCESSES, 1])  # 現在の試行の報酬を保持
final_rewards = torch.zeros([NUM_PROCESSES, 1])  # 最後の試行の報酬和を保持

# 初期状態の開始
obs = resets(Env_play)
obs = torch.from_numpy(obs).float()  # torch.Size([16, 3, 40, 12])
current_obs = obs.to(device) # flameの4番目に最新のobsを格納

# advanced学習用のオブジェクトrolloutsの状態の1つ目に、現在の状態を保存
rollouts.observations[0].copy_(current_obs)

frames = []
main_end = False
# 実行ループ
for j in tqdm(range(NUM_UPDATES)):
    # advanced学習するstep数ごとに計算
    if main_end:
        break
        
    for step in range(NUM_ADVANCED_STEP):

        # 行動を求める
        with torch.no_grad():
            action = actor_critic.act(rollouts.observations[step])
        
        cpu_actions = action.squeeze(1).cpu().numpy()  # tensorをNumPyに
        
        # 1stepの並列実行、なお返り値のobsのsizeは(16, 1, 84, 84)
        obs, reward, done, runOK = steps(Env_play, cpu_actions)
        
        frames.append(obs)
        
        if done or not runOK:
            main_end = True
        # 報酬をtensorに変換し、試行の総報酬に足す
        # sizeが(16,)になっているのを(16, 1)に変換
        reward = np.expand_dims(np.stack(reward), 1)
        reward = torch.from_numpy(reward).float()
        episode_rewards += reward
        
        # 各実行環境それぞれについて、doneならmaskは0に、継続中ならmaskは1にする
        masks = torch.FloatTensor(
            [[0.0] if done_ or not runOK_ else [1.0] for done_, runOK_ in zip(done,runOK)])
        # 最後の試行の総報酬を更新する
        final_rewards *= masks  # 継続中の場合は1をかけ算してそのまま、done時には0を掛けてリセット
        # 継続中は0を足す、done時にはepisode_rewardsを足す
        final_rewards += (1 - masks) * episode_rewards
        
        
        # 試行の総報酬を更新する
        episode_rewards *= masks  # 継続中のmaskは1なのでそのまま、doneの場合は0に
        
        # masksをGPUへ
        masks = masks.to(device)
        
        # 現在の状態をdone時には全部0にする
        # maskのサイズをtorch.Size([16, 1])→torch.Size([16, 1, 1 ,1])へ変換して、かけ算
        current_obs *= masks.unsqueeze(2).unsqueeze(2)
        
        # frameをstackする
        # torch.Size([16, 1, 40, 12])
        obs = torch.from_numpy(obs).float()
        current_obs = obs.to(device)  # 最新のobsを格納
        
        # メモリオブジェクトに今stepのtransitionを挿入
        rollouts.insert(current_obs, action.data, reward, masks)
        
    # advancedのfor loop終了

    # advancedした最終stepの状態から予想する状態価値を計算
    with torch.no_grad():
        next_value = actor_critic.get_value(
            rollouts.observations[-1]).detach()
        
        
    # 全stepの割引報酬和を計算して、rolloutsの変数returnsを更新
    rollouts.compute_returns(next_value)
    
    
    # ネットワークとrolloutの更新
    global_brain.update(rollouts)
    rollouts.after_update()
    


  0%|          | 1/1666666 [03:13<89688:40:53, 193.73s/it]

In [125]:
## この環境の部分を変える

class Aircond:
    '''Aircondのクラス'''
    def __init__(self, CASE, stride=500,end=3000,xCells=40,
                         insert_list = [15,15,15,15,33,33,33,51,69,69,69,87,105,105,105,142,142,142,342,342,380,380]):
        self.CASE = CASE
        # メッシュを作らないとpolymeshがないので。
        os.system(CASE.name + '/Makemesh')
        # get nCells
        with open (self.CASE.name + '/constant/polyMesh/neighbour') as f:
            neighbour = f.read()
        nCells_index = neighbour.find('nCells')
        nCells_ = neighbour[nCells_index : nCells_index+15]
        nCells = int(re.sub(r'\D', '', nCells_))
        self.nCells = nCells
        
        self.action_SPEED = np.array([0.1,0.3,0.5])
        self.action_DIRECTION = np.array([-1*np.pi/8, -2*np.pi/8,-3*np.pi/8])
        self.action_TEMPERTURE = np.array([18+273.15,22+273.15,26+273.15])
        self.action_space = np.tile(np.array([0,0,0]),(27,1))
        self.observation_space_ = np.tile(np.array([0,0,0]),(self.nCells,1))
        #self.observation_space = np.tile(np.array([0]), (self.nCells*3,1)
        
        self.xCells = xCells
        self.insert_list = insert_list
        observation_space = np.tile(np.array([0,0,0]), (self.nCells+len(self.insert_list),1))
        U_space_x = observation_space[:,0].reshape(self.xCells,-1)
        U_space_y = observation_space[:,1].reshape(self.xCells,-1)
        T_space = observation_space[:,2].reshape(self.xCells,-1)
        self.observation_space = np.array([U_space_x, U_space_y, T_space]) 
        
        self.stride = stride  # 進めるステップの幅
        # stepが始まってからのtime。始まる前にstepを進めた場合は含まれず0
        self.present_time = 0  
        # openFoam側のcontrolDictに記載されているtime
        self.startTime = 0
        self.endTime = copy(self.stride)
        # いつ終了するか
        self.end = end
        
        # 各辞書ファイルの取得
        self.initialDir = self.CASE.initialDir()+'/'
        self.constant = self.CASE.name + "/constant/"
        self.system = self.CASE.name + "/system/"
        self.initialDir_file = []
        for x in os.listdir(self.initialDir):
            if os.path.isfile(self.initialDir + x):
                self.initialDir_file.append(x)
        self.constant_file = []
        for y in os.listdir(self.constant):
            if os.path.isfile(self.constant + y):
                self.constant_file.append(y)
        self.system_file = []
        for z in os.listdir(self.system):
            if os.path.isfile(self.system + z):
                self.system_file.append(z)
        
        # 各辞書ファイルをそれぞれのファイル名で保存
        for i in range(len(self.initialDir_file)):
            self.__dict__[self.initialDir_file[i]] = ParsedParameterFile(self.initialDir + self.initialDir_file[i])

        for i in range(len(self.system_file)):
            self.__dict__[self.system_file[i]] = ParsedParameterFile(self.system + self.system_file[i])
            
    def initial_to_float(self, numpy_Parsed_value):
        '''uniformをnp.arrayに変換'''
        numpy_Parsed_value = np.array(numpy_Parsed_value)
        if numpy_Parsed_value.ndim==0:
            Parsed_raw = str(numpy_Parsed_value.all())
            Parsed_str = Parsed_raw[8:].strip('()').split(' ')
            Parsed_int = np.array(list(map(float,Parsed_str)))
            #Parsed = np.tile(Parsed_int,(self.nCells,1))
        return Parsed_int
    
    def initial_to_array(self, numpy_Parsed_value):
        '''uniformをnCellの数だけnp.arrayに変換'''
        numpy_Parsed_value = np.array(numpy_Parsed_value)
        if numpy_Parsed_value.ndim==0:
            Parsed_raw = str(numpy_Parsed_value.all())
            Parsed_str = Parsed_raw[8:].strip('()').split(' ')
            Parsed_int = np.array(list(map(float,Parsed_str)))
            Parsed = np.tile(Parsed_int,(self.nCells,1))
        return Parsed

    def make_observation_old(self,Dir):
        '''Dirのpathのobservationを取得'''
        U_value = np.array(ParsedParameterFile(Dir + '/U').content['internalField'])
        T_value = np.array(ParsedParameterFile(Dir + '/T').content['internalField'])
        if U_value.ndim == 0:
            U_value = self.initial_to_array(U_value)
            T_value = self.initial_to_array(T_value)
        U_value_xy = np.delete(U_value, axis=1, obj=2)
        T_value_x = np.reshape(T_value, [-1,1], order='F')
        Observation = np.concatenate([U_value_xy, T_value_x],axis=1)
        return Observation    
    
    def make_observation_onerow(self,Dir):
        '''Dirのpathのobservationを取得
        各U1, U2, Tがすべて一列で並んだ状態を返す'''
        U_value = np.array(ParsedParameterFile(Dir + '/U').content['internalField'])
        T_value = np.array(ParsedParameterFile(Dir + '/T').content['internalField'])
        if U_value.ndim == 0:
            U_value = self.initial_to_array(U_value)
            T_value = self.initial_to_array(T_value)
        #U_value_xy = np.delete(U_value, axis=1, obj=2)
        T_value_x = T_value.reshape(-1, 1)
        U_value_x = U_value[:,0].reshape(-1, 1)
        U_value_y = U_value[:,1].reshape(-1, 1)
        observation = np.concatenate([U_value_x, U_value_y, T_value_x], axis=0)
        return observation
    
    def make_observation(self,Dir,celsius=True):
        '''observationを２次元で取得。
        障害物があるところは全て値を0で埋める。
        self.xCells : x方向のセル数
        self.insert_list : 障害物があり、値を0で埋めるべき場所
        '''
        U_value = np.array(ParsedParameterFile(Dir + '/U').content['internalField'])
        T_value = np.array(ParsedParameterFile(Dir + '/T').content['internalField'])
        if U_value.ndim == 0:
            U_value = self.initial_to_array(U_value)
            T_value = self.initial_to_array(T_value)
        # セルシウス℃に直す
        if celsius:
            T_value = self.Celsius_(T_value)
            T_value = T_value.astype(np.float64)
        U_value_xy = np.delete(U_value, axis=1, obj=2)
        T_value_x = np.reshape(T_value, [-1,1], order='F')
        observation_ = np.concatenate([U_value_xy, T_value_x],axis=1)  # 3 axis observation
        observation_ = np.insert(observation_, self.insert_list, [0,0,0], axis=0)
        U_value_x = observation_[:,0].reshape(self.xCells,-1)
        U_value_y = observation_[:,1].reshape(self.xCells,-1)
        T_value = observation_[:,2].reshape(self.xCells,-1)
        observation = np.array([U_value_x, U_value_y, T_value])
        return observation
    
    def make_action(self):
        '''actionの設定'''
        Action = np.empty((0,3),float)
        for i in range(len(self.action_SPEED)):
            for j in range(len(self.action_DIRECTION)):
                for k in range(len(self.action_TEMPERTURE)):
                    Ux = self.action_SPEED[i]*np.cos(self.action_DIRECTION[j])
                    Uy = self.action_SPEED[i]*np.sin(self.action_DIRECTION[j])
                    Act = np.array([[Ux,Uy,self.action_TEMPERTURE[k]]])
                    Action = np.append(Action,Act,axis=0)
                    
        return Action
    
    def getParsed(self,time_step):
        '''各time_stepのParsedParameterFileを取得'''
        T = ParsedParameterFile(self.CASE.name + '/' + str(time_step) + '/T')
        U = ParsedParameterFile(self.CASE.name + '/' + str(time_step) + '/U')
        TU_list = [T,U]
        return TU_list
    
    
    def getParsedList(self,first_step, last_step, write_step,):
        '''各time_stepのParsedParameterFileを取得'''
        TU_list = []
        for stp in range(first_step, last_step, write_step):
            T = ParsedParameterFile(self.CASE.name + '/' + str(stp) + '/T')
            U = ParsedParameterFile(self.CASE.name + '/' + str(stp) + '/U')
            TU_list.append([T,U])
        return TU_list
    
    # 後にcythonで書き直す予定
    def calc_PMV(self, TA=20,VA=0.3,TR=20,RH=50,AL=1,CLO=1):
        """PMVとPPDを計算
        デフォルト値。TA,VA,TR,RHまでは入力を推奨
        TA = 20  #  温度[℃]
        VA = 0.3  # 流速[m/s]
        TR = 20  # MRT[℃]
        RH = 50  # 相対湿度[%]
        AL = 1  # 活動量[met]
        CLO = 1 # 着衣量[clo]
        
        """
        #***************************************************
        # 外部仕事 W＝0 [W/㎡]とする。
        #***************************************************
        # PMV 計算準備
        #
        M = AL * 58.15
        LCL = CLO
        W = 0
        #PA = (RH / 100 * np.exp(18.6686 - 4030.18 / (TA + 235))) / 0.00750062
        PPK = 673.4 - 1.8 * TA
        PPA = 3.2437814 + 0.00326014 * PPK + 2.00658 * 1E-9 * PPK * PPK * PPK
        PPB = (1165.09 - PPK) * (1 + 0.00121547 * PPK)
        PA = RH / 100 * 22105.8416 / np.exp(2.302585 * PPK * PPA / PPB) * 1000
        EPS = 1E-5
        MW = M - W
        # FCL＝着衣表面積／裸体表面積の比
        if LCL > 0.5:
            FCL = 1.05 + 0.1 * LCL
        else:
            FCL = 1 + 0.2 * LCL
        # 衣服表面温度TCLの初期値設定
        TCL = TA
        TCLA = TCL
        NOI = 1
        # 着衣表面温度の計算
        while True:
            TCLA = 0.8 * TCLA + 0.2 * TCL
            HC = 12.1 * np.sqrt(VA)
            if 2.38 * np.sqrt(np.sqrt(abs(TCL - TA))) > HC:
                HC = 2.38 * np.sqrt(np.sqrt(abs(TCL - TA)))
            TCL = 35.7 - 0.028 * MW - 0.155 * LCL * (3.96 * 1E-8 * FCL * ((TCLA + 273) ** 4 - (TR + 273) ** 4) + FCL * HC * (TCLA - TA))
            NOI = NOI + 1
            if NOI > 150:
                #PMV = 999990.999
                PMB = 3.0
                PPD = 100
                return (PMV,PPD)
            if not abs(TCLA - TCL) > EPS:
                break
        #PMVの計算
        PM1 = 3.96 * 1E-8 * FCL * ((TCL + 273) ** 4 - (TA + 273) ** 4)
        PM2 = FCL * HC * (TCL - TA)
        PM3 = 0.303 * np.exp(-0.036 * M) + 0.028
        if MW > 58.15:
            PM4 = 0.42 * (MW - 58.15)
        else:
            PM4 = 0
        PMV = PM3 * (MW - 3.05 * 0.001 * (5733 - 6.99 * MW - PA) - PM4 - 1.7 * 1E-5 * M * (5867 - PA) - 0.0014 * M * (34 - TA) - PM1 - PM2)
            #PRINT PMV
        if abs(PMV) > 3:
            #PMV = 999990.999
            PMV = 3.0
            PPD = 100
            return (PMV,PPD)
        
        PPD = 100 - 95 * np.exp(-0.0335 * PMV ** 4 - 0.2179 * PMV ** 2)
        
        return (PMV,PPD)
    
    def calc_MRT(self, T_Parsed):
        '''MRTを計算'''
        
        T_wall_list = np.array([])
        if np.array(T_Parsed['internalField']).ndim==0:  # time_step=0
            for boundary in list(T_Parsed['boundaryField']):
                if T_Parsed['boundaryField'][boundary]['type']=='zeroGradient' or \
                T_Parsed['boundaryField'][boundary]['type']=='empty' or \
                    T_Parsed['boundaryField'][boundary]['type']=='fixedValue':
                    T_wall = np.array([])
                else:
                    numpy_Parsed_value = np.array(T_Parsed['boundaryField'][boundary]['value'])
                    T_wall = self.initial_to_float(numpy_Parsed_value)
                T_wall_list = np.append(T_wall_list, T_wall)
                
        else:
            for boundary in list(T_Parsed['boundaryField']):
                if T_Parsed['boundaryField'][boundary]['type']=='fixedValue':
                    numpy_Parsed_value = np.array(T_Parsed['boundaryField'][boundary]['value'])
                    T_wall = self.initial_to_float(numpy_Parsed_value)
                elif T_Parsed['boundaryField'][boundary]['type']=='zeroGradient' or \
                T_Parsed['boundaryField'][boundary]['type']=='empty':
                    T_wall = np.array([])
                else:
                    T_wall = np.array(T_Parsed['boundaryField'][boundary]['value'])
                    if T_wall.ndim==0:
                        T_wall = self.initial_to_float(T_wall)
                T_wall_list = np.append(T_wall_list, T_wall)
        return np.average(T_wall_list)
    
    def Celsius(self, T):
        CelsiusT = T - 273.15
        return CelsiusT
    
    def Celsius_(self, T):
        '''np.arrayの配列をセルシウス℃に変換'''
        if np.array(T).size==1:
            return self.Celsius(T)
        else:
            Celsiuss = np.frompyfunc(self.Celsius,1,1)  # リストに適用可にする
            return Celsiuss(T)
        
    def UScalar(self, U):
        '''Uをスカラーに変換'''
        if np.array(U).size<=3:
            return np.array([np.sqrt(U[0]**2 + U[1]**2)])
        else:
            return np.sqrt(U[:,0]**2 + U[:,1]**2)
        
    def calc_PMV_all(self, TU_Parsed,RH=50,AL=1,CLO=1):
        '''PMVを一つのtime_stepで全点計算
        TU_Parsed : TとUのParsedParameterFileをリストにしたもの
        全ての点のPMVとPPVの値を返す
        time=0でも、すべてのセルの値を返す。'''
        T_Parsed,U_Parsed = TU_Parsed
        T = np.array(T_Parsed['internalField'])
        U = np.array(U_Parsed['internalField'])
        # time_step==0の場合
        if T.ndim==0 or U.ndim==0:
            T = self.initial_to_float(T)
            U = self.initial_to_float(U)
            # Uを速さに変換
            Us = self.UScalar(U)
            MRT = self.calc_MRT(T_Parsed)
            # TとMRTをセルシウス温度に変換
            Tc = self.Celsius_(T)
            MRTc = self.Celsius_(MRT)
            pmv,ppd = self.calc_PMV(TA=Tc,VA=Us,TR=MRTc,RH=RH,AL=AL,CLO=CLO)
            PMV = np.tile(pmv, self.nCells)
            PPD = np.tile(ppd, self.nCells)
        else:   
            # Uを速さに変換
            Us = self.UScalar(U)
            MRT = self.calc_MRT(T_Parsed)
            # TとMRTをセルシウス温度に変換
            Tc = list(self.Celsius_(T))
            MRTc = self.Celsius_(MRT)
            
            length = len(T)
            # ループを早くするため、外に出す。
            PMV = []
            PPD = []
            PMVappend = PMV.append
            PPDappend = PPD.append
            for i in range(length):
                pmv,ppd = self.calc_PMV(TA=Tc[i],VA=Us[i],TR=MRTc,RH=RH,AL=AL,CLO=CLO)
                PMVappend(pmv)
                PPDappend(ppd)
            PMV = np.array(PMV)
            PPD = np.array(PPD)
        return [PMV,PPD]
    
    def calc_PMV_error(self, TU_Parsed,RH=50,AL=1,CLO=1):
        """PMVの全点の2条誤差の合計を計算
        入力はcalc_PMV_allと同じ。返すものだけが違う。
        PMVは、0との2乗誤差、PPDは0との、根平均2乗誤差を返す。
        """
        PMV, PPD = self.calc_PMV_all(TU_Parsed, RH=RH,AL=AL,CLO=CLO)
        PMV_mae = ((PMV - 0)**2).mean()
        PPD_rmse = np.sqrt( ((PPD - 0)**2).mean())
        return PMV_mae, PPD_rmse
    
    def header(self, time_step, filename):
        '''headerファイルを作成'''
        header = """/*--------------------------------*- C++ -*----------------------------------*\
=========                 |
  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
   \\    /   O peration     | Website:  https://openfoam.org
    \\  /    A nd           | Version:  6
     \\/     M anipulation  |
\*---------------------------------------------------------------------------*/
FoamFile
{{
    version     2.0;
    format      ascii;
    class       volScalarField;
    location    "{}";
    object      {};
}}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
""".format(time_step, filename)
        return header
    
    def internal(self, list_internal):
        '''internalFieldの値の作成'''
        if len(list_internal)==1:
            internal = """
internalField   uniform {};""".format(list_internal[0])
        else:
            str_= np.frompyfunc(str,1,1)
            str_internal = '\n'.join(str_(list_internal))
            internal = """
internalField   nonuniform List<scalar> 
{}
(
{}
)
;
""".format(self.nCells, str_internal)
        return internal
    
    def makePMVFile(self,time_step):
        '''PMVとPPDファイルを書き込む'''
        
        path_pmv = self.CASE.name + '/' + str(time_step) + '/PMV' # 書き込むパス
        path_ppd = self.CASE.name + '/' + str(time_step) + '/PPD'
        
        demensions = """
dimensions      [0 0 0 0 0 0 0];
"""
        
        boundary = """
boundaryField
{
    ".*"
    {
        type            zeroGradient;
    }
}


// ************************************************************************* //
"""
        # header, dimensions, internal, boundaryの順に書き込む
        f = open(path_pmv, 'w') # ファイルを開く(該当ファイルがなければ新規作成)
        g = open(path_ppd, 'w')
        f.write(self.header(time_step,"PMV")) # headerを記載する
        g.write(self.header(time_step,"PPD"))
        f.write(demensions) # dimensionsを記載する
        g.write(demensions)
        # internalFieldの計算
        TU_Parsed = self.getParsed(time_step)
        PMV,PPD = self.calc_PMV_all(TU_Parsed)
        internal_PMV = self.internal(PMV)
        internal_PPD = self.internal(PPD)
        f.write(internal_PMV)  
        g.write(internal_PPD)
        f.write(boundary)
        g.write(boundary)
        f.close() 
        g.close()

        
    def makePMVList(self,first_step, last_step, write_step):
        '''任意の範囲でPMVファイルを作成'''
        for stp in range(first_step, last_step, write_step):
            self.makePMVFile(stp)
            
        
    def meshNumberFile(self,time_step):
        '''メッシュの並びを確認する'''
        path_mesh = self.CASE.name + '/' + str(time_step) + '/Meshnumber' # 書き込むパス


        demensions = """
dimensions      [0 0 0 0 0 0 0];
"""
        boundary = """
boundaryField
{
    ".*"
    {
        type            zeroGradient;
    }
}


// ************************************************************************* //
"""
        f = open(path_mesh, 'w') # ファイルを開く(該当ファイルがなければ新規作成)
        f.write(self.header(time_step,"PMV")) # headerを記載する
        f.write(demensions) # dimensionsを記載する
        mesh_list = [x for x in range(1,self.nCells+1)]
        internal_mesh = self.internal(mesh_list)
        f.write(internal_mesh)  
        f.write(boundary)
        f.close() 
            
    def calc_ADPI(self,TU_Parsed,occupied_zone_cell):
        '''ADPIを計算する'''
        
        # occupied_zone_cellはaircond5の場合は1~340までのセルが居住域
        T_Parsed,U_Parsed = TU_Parsed
        T = np.array(T_Parsed['internalField'])
        U = np.array(U_Parsed['internalField'])
        # time_step==0の場合
        if T.ndim==0 or U.ndim==0:
            T = self.initial_to_float(T)
            U = self.initial_to_float(U)
        
        Tc = np.average(T)  # 室内の平均温度
        Us = self.UScalar(U)  # 流速
        theta = (T - Tc) - 8.0*(Us - 0.15)  # 有効ドラフト温度
        
        satisfy_theta = np.where((theta > -1.5) & (theta < 1), 1, 0)
        satisfy_Us = np.where(Us < 0.35,1, 0)  # 条件を満たすものを1,満たさないものを0
        satisfy_all = satisfy_theta + satisfy_Us
        satisfy = satisfy_all[:occupied_zone_cell]
        nCells = satisfy.size
        num_satisfy = np.sum(satisfy == 2)
        ADPI = num_satisfy/nCells*100
        
        return (ADPI, theta)
    
    def calc_EUC(self,T_Parsed, occupied_zone_cell,last_cell):
        '''EUCを計算する'''
        
        T = np.array(T_Parsed['internalField'])
        T0 = self.initial_to_float(T_Parsed['boundaryField']['inlet']['value'])[0] # 給気温度

        if T.ndim==0:
            T = self.initial_to_float(T)[0]
            Toz = T
            Tiz = T
        else:
            Toz = np.average(T[occupied_zone_cell:last_cell])  # 居住域外の平均温度  
            Tiz = np.average(T[:occupied_zone_cell])  # 居住域内の平均温度
        EUC = (Toz-T0) / (Tiz-T0) * 100
        return EUC
        
    def getPMVList(self, first_step, last_step, write_step):
        '''任意の範囲のPMVの平均値ファイルを取得'''
        
        # ループを早くするため、外に出す。
        PMV_list = []
        PPD_list = []
        PMVappend = PMV_list.append
        PPDappend = PPD_list.append
        for stp in range(first_step, last_step, write_step):
            TU_Parsed = self.getParsed(stp)
            PMV,PPD = self.calc_PMV_all(TU_Parsed)
            pmv = np.average(np.array(PMV))
            ppd = np.average(np.array(PPD))
            PMVappend(pmv)
            PPDappend(ppd)
        return [PMV_list, PPD_list]
    
    
    def getADPIList(self, first_step, last_step, write_step,occupied_zone_cell=342):
        '''任意の範囲のADPIの値を取得'''
        
        ADPI_list = []
        ADPIappend = ADPI_list.append
        for stp in range(first_step, last_step, write_step):
            TU_Parsed = self.getParsed(stp)
            adpi,theta = self.calc_ADPI(TU_Parsed, occupied_zone_cell)
            ADPIappend(adpi)
        return ADPI_list
    
    def getEUCList(self, first_step, last_step, write_step,
                    occupied_zone_cell=342, last_cell=100000):
        '''任意の範囲のEUCの値を算出'''
        
        EUC_list = []
        EUCappend = EUC_list.append
        for stp in range(first_step, last_step, write_step):
            T_Parsed,U_Parsed = self.getParsed(stp)
            euc = self.calc_EUC(T_Parsed, occupied_zone_cell, last_cell)
            EUCappend(euc)
        return EUC_list
    
    def getTUList(self, first_step, last_step, write_step):
        '''任意の範囲のTとUの平均値を取得'''
        
        T_list = []
        U_list = []
        MRT_list = []
        Tappend = T_list.append
        Uappend = U_list.append
        MRTappend = MRT_list.append
        for stp in range(first_step, last_step, write_step):
            T_Parsed, U_Parsed = self.getParsed(stp)
            T = np.array(T_Parsed['internalField'])
            U = np.array(U_Parsed['internalField'])
            # time_step==0の場合
            if T.ndim==0 or U.ndim==0:
                T = self.initial_to_float(T)
                U = self.initial_to_float(U)
            # Uを速さに変換
            T = np.average(T)
            Us = np.average(np.array(self.UScalar(U)))
            MRT = np.average(np.array(self.calc_MRT(T_Parsed)))
            # TとMRTをセルシウス温度に変換
            Tc = self.Celsius(T)
            MRTc = self.Celsius(MRT)
            Tappend(Tc)
            Uappend(Us)
            MRTappend(MRTc)
        return [T_list,U_list,MRT_list]
        
        
        
    def change_control(self,control):
        if control == 1:
            self.blockMeshDict['blocks'][2] = Vector(20,10,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.02
        if control == 2:
            self.blockMeshDict['blocks'][2] = Vector(40,20,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.02
        if control == 3:
            self.blockMeshDict['blocks'][2] = Vector(20,10,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.01
        if control == 4:
            self.blockMeshDict['blocks'][2] = Vector(40,20,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.01
            
    def write_interval(self, writeInterval):
        self.controlDict['writeInterval'] = writeInterval
        
        
    def reset(self):
        '''環境のリセット'''
        
        # reset parameter
        self.present_time = 0  
        self.startTime = 0
        self.endTime = copy(self.stride)
        
        # reset control Dict
        clDict = ParsedParameterFile(self.CASE.controlDict())
        clDict['startTime'] = self.startTime
        clDict['endTime'] = self.endTime
        clDict.writeFile()
        #self.startTime = clDict['startTime']
        #self.endTime = clDict['endTime']
        
        #os.system('./Allclean')
        os.system(self.CASE.name + '/Makemesh')
        
        # 初期条件の設定（ランダム）
        T_initial = ParsedParameterFile(self.CASE.initialDir() + '/T')
        # random parameter from 26 to 35
        T_rand = np.random.randint(26+273,35+273)
        T_initial['internalField'].setUniform(T_rand)
        T_initial.writeFile()
        
        
        # set action and observation
        self.action_space= self.make_action()
        self.observation = self.make_observation(self.CASE.initialDir())
        return self.observation
    
    def step_old(self, action):
        '''ステップを進める'''
        #clDict = ParsedParameterFile(self.CASE.controlDict())      
        if self.present_time >= self.end:
            done = True
            runOK = 'end'
        else:
            done = False
            
            # actionに従った、境界条件を設定
            # action is 0~26
            U_latest = ParsedParameterFile(self.CASE.latestDir() + '/U')
            T_latest = ParsedParameterFile(self.CASE.latestDir() + '/T')
            self.act = self.action_space[action]
            U_latest['boundaryField']['inlet']['value'].setUniform(Vector(self.act[0],self.act[1],0))
            U_latest.writeFile()
            T_latest['boundaryField']['inlet']['value'].setUniform(self.act[2])
            T_latest.writeFile()
            
            # OpenFOAMのコマンドを実行
            args=shlex.split("buoyantPimpleFoam -case " + self.CASE.name)
            buoyant=BasicRunner(args,silent=True)
            self.summary=buoyant.start()
            runOK = buoyant.runOK()
            
            #os.system("buoyantBoussinesqPimpleFoam")
            
            # clDictのコントロール
            self.present_time += self.stride
            clDict = ParsedParameterFile(self.CASE.controlDict())
            self.startTime += self.stride
            self.endTime += self.stride
            clDict['startTime'] = self.startTime
            clDict['endTime'] = self.endTime
            clDict.writeFile()
            
            self.startTime = clDict['startTime']
            self.endTime = clDict['endTime']
            
            self.observation = self.make_observation(self.CASE.latestDir())
            
        return (self.observation, done, runOK)
    
    
    def step(self, action, reward='PMV'):
        '''ステップを進める
        報酬はPMV等から選択
        '''
        #clDict = ParsedParameterFile(self.CASE.controlDict())      
        if self.present_time >= self.end:
            done = True
            runOK = 'end'
            # rewardと、observationは1ステップ前の値をそのまま使う。
        else:
            done = False
            
            # actionに従った、境界条件を設定
            # action is 0~26
            U_latest = ParsedParameterFile(self.CASE.latestDir() + '/U')
            T_latest = ParsedParameterFile(self.CASE.latestDir() + '/T')
            self.act = self.action_space[action]
            U_latest['boundaryField']['inlet']['value'].setUniform(Vector(self.act[0],self.act[1],0))
            U_latest.writeFile()
            T_latest['boundaryField']['inlet']['value'].setUniform(self.act[2])
            T_latest.writeFile()
            
            # OpenFOAMのコマンドを実行
            args=shlex.split("buoyantPimpleFoam -case " + self.CASE.name)
            buoyant=BasicRunner(args,silent=True)
            self.summary=buoyant.start()
            runOK = buoyant.runOK()
            
            #os.system("buoyantBoussinesqPimpleFoam")
            
            # clDictのコントロール
            self.present_time += self.stride
            clDict = ParsedParameterFile(self.CASE.controlDict())
            self.startTime += self.stride
            self.endTime += self.stride
            clDict['startTime'] = self.startTime
            clDict['endTime'] = self.endTime
            clDict.writeFile()
            
            self.startTime = clDict['startTime']
            self.endTime = clDict['endTime']
            
            self.observation = self.make_observation(self.CASE.latestDir())
            
            # 報酬の計算。make_observationでは、0を補完していれているため、用いない。
            T_new = ParsedParameterFile(self.CASE.latestDir() + '/T')
            U_new = ParsedParameterFile(self.CASE.latestDir() + '/U')
            TU_Parsed = [T_new,U_new]
            PMV_mae, PPD_rmse = self.calc_PMV_error(TU_Parsed, RH=50,AL=1,CLO=1)
            # 報酬は、ズレ分をマイナス、ちかづいたら、プラスにする。
            self.reward = -PMV_mae + 1
            
        
        return (self.observation, self.reward, done, runOK)
        

In [126]:
# aircondを並列でたくさんつくるためのクラス

# ケースの作成
def makecase(NUM_PROCESSES,stride=500, end=3000, xCells=40,
                         insert_list = [15,15,15,15,33,33,33,51,69,69,69,87,105,105,105,142,142,142,342,342,380,380]):
    """並列でたくさんのケースをつくる"""
    os.system("./makecase {}".format(NUM_PROCESSES))
    Envs = []
    Envs_append = Envs.append
    for i in range(NUM_PROCESSES):
        CASE = SolutionDirectory("./Case/case{}".format(i))
        aircond = Aircond(CASE, stride=stride, end=end, xCells=xCells, insert_list=insert_list)
        Envs_append(aircond)
    return Envs

In [128]:
# 定数の設定

#ENV_NAME = 'BreakoutNoFrameskip-v4' 
# Breakout-v0ではなく、BreakoutNoFrameskip-v4を使用
# v0はフレームが自動的に2-4のランダムにskipされますが、今回はフレームスキップはさせないバージョンを使用
# 参考URL https://becominghuman.ai/lets-build-an-atari-ai-part-1-dqn-df57e8ff3b26
# https://github.com/openai/gym/blob/5cb12296274020db9bb6378ce54276b31e7002da/gym/envs/__init__.py#L371
    
NUM_SKIP_FRAME = 4 # skipするframe数です
NUM_STACK_FRAME = 1  # 状態として連続的に保持するframe数です
NOOP_MAX = 30  #  reset時に何もしないフレームを挟む（No-operation）フレーム数の乱数上限です
NUM_PROCESSES = 2 #  並列して同時実行するプロセス数です
NUM_ADVANCED_STEP = 5  # 何ステップ進めて報酬和を計算するのか設定
GAMMA = 0.99  # 時間割引率

TOTAL_FRAMES=10e6  #  学習に使用する総フレーム数
NUM_UPDATES = int(TOTAL_FRAMES / NUM_ADVANCED_STEP / NUM_PROCESSES)  # ネットワークの総更新回数
# NUM_UPDATESは125,000となる


In [129]:
NUM_UPDATES = 10

In [130]:
# A2Cの損失関数の計算のための定数設定
value_loss_coef = 0.5
entropy_coef = 0.01
max_grad_norm = 0.5

# 学習手法RMSpropの設定
lr = 7e-4
eps = 1e-5
alpha = 0.99


In [131]:
# GPUの使用の設定
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)


cpu


In [132]:
# A2Cのディープ・ニューラルネットワークの構築


def init(module, gain):
    '''層の結合パラメータを初期化する関数を定義'''
    nn.init.orthogonal_(module.weight.data, gain=gain)
    nn.init.constant_(module.bias.data, 0)
    return module


class Flatten(nn.Module):
    '''コンボリューション層の出力画像を1次元に変換する層を定義'''

    def forward(self, x):
        return x.view(x.size(0), -1)


class Net(nn.Module):
    def __init__(self, n_out):
        super(Net, self).__init__()

        # 結合パラメータの初期化関数
        def init_(module): return init(
            module, gain=nn.init.calculate_gain('relu'))

        # コンボリューション層の定義
        self.conv = nn.Sequential(
            # 画像サイズの変化12*40→4*18
            init_(nn.Conv2d(3, 32, kernel_size=5,stride=2)),
            # stackするflameは4画像なのでinput=NUM_STACK_FRAME=4である、出力は32とする、
            # sizeの計算  size = (Input_size - Kernel_size + 2*Padding_size)/ Stride_size + 1

            nn.ReLU(),
            # 画像サイズの変化4*18→3*17
            init_(nn.Conv2d(32, 64, kernel_size=2, stride=1)),
            nn.ReLU(),
            init_(nn.Conv2d(64, 64, kernel_size=2, stride=1)),  # 画像サイズの変化3*17→2*16
            nn.ReLU(),
            Flatten(),  # 画像形式を1次元に変換
            init_(nn.Linear(64 * 2 * 16, 512)),  # 64枚の7×7の画像を、512次元のoutputへ
            nn.ReLU()
        )

        # 結合パラメータの初期化関数
        def init_(module): return init(module, gain=1.0)

        # Criticの定義
        self.critic = init_(nn.Linear(512, 1))  # 状態価値なので出力は1つ

        # 結合パラメータの初期化関数
        def init_(module): return init(module, gain=0.01)

        # Actorの定義
        self.actor = init_(nn.Linear(512, n_out))  # 行動を決めるので出力は行動の種類数

        # ネットワークを訓練モードに設定
        self.train()

    def forward(self, x):
        '''ネットワークのフォワード計算を定義します'''
        #input = x / 255.0  # 画像のピクセル値0-255を0-1に正規化する
        input = x  # 正規化はしない
        conv_output = self.conv(input)  # Convolution層の計算
        critic_output = self.critic(conv_output)  # 状態価値の計算
        actor_output = self.actor(conv_output)  # 行動の計算

        return critic_output, actor_output

    def act(self, x):
        '''状態xから行動を確率的に求めます'''
        value, actor_output = self(x)
        probs = F.softmax(actor_output, dim=1)    # dim=1で行動の種類方向に計算
        action = probs.multinomial(num_samples=1)

        return action

    def get_value(self, x):
        '''状態xから状態価値を求めます'''
        value, actor_output = self(x)

        return value

    def evaluate_actions(self, x, actions):
        '''状態xから状態価値、実際の行動actionsのlog確率とエントロピーを求めます'''
        value, actor_output = self(x)

        log_probs = F.log_softmax(actor_output, dim=1)  # dim=1で行動の種類方向に計算
        action_log_probs = log_probs.gather(1, actions)  # 実際の行動のlog_probsを求める

        probs = F.softmax(actor_output, dim=1)  # dim=1で行動の種類方向に計算
        dist_entropy = -(log_probs * probs).sum(-1).mean()

        return value, action_log_probs, dist_entropy


In [63]:
def resets(Envs):
    """resets all"""
    obs = []
    obs_append = obs.append
    for i in range(len(Envs)):
        obs_ = Envs[i].reset()
        obs_append(obs_)
    obs = np.array(obs)
    return obs

In [64]:
def steps(Envs, action):
    """すべての環境で同じだけステップを進める
    obsはnumpy, done, runOKはリスト
    """
    obs = []
    reward = []
    done = []
    runOK = []
    obs_append = obs.append
    reward_append = reward.append
    done_append = done.append
    runOK_append = runOK.append
    for i in range(len(Envs)):
        obs_, reward_, done_, runOK_ = Envs[i].step(action[i])
        obs_append(obs_)
        reward_append(reward_)
        done_append(done_)
        runOK_append(runOK_)
    obs = np.array(obs)
    return obs, reward, done, runOK
    

In [65]:
def make_random_actions(Envs, max_execution=3):
    """適当にステップを進めて、環境をバラバラにする。
    環境のリスト
    max_execution : 進める最大のステップ。0~ステップ分進めることになる。
    random_actionは複素数で返す。0+1Jは補完したもの。
    """
    # 0~max_executions
    # 複素数にして、1jは補完したものとする。
    action_shape = Envs[0].action_space.shape[0]
    random_actions = []
    for i in range(len(Envs)):
        i_th_action = []
        for j in range(random.randint(0,max_execution)):
            i_th_action.append(random.randint(0, action_shape-1))
        random_actions.append(i_th_action)
        
    max_len = max(map(len, random_actions))
    random_actions = np.array(list(map(lambda x:x + [1j]*(max_len-len(x)), random_actions)))
    random_actions = random_actions.astype(np.complex128)
    return random_actions

In [66]:
def random_steps(Envs, random_actions, step_reset=True):
    """random_actions分それぞれステップを進める。
    Envs : 環境のリスト
    random_actions, len(Envs)行,進めるステップ分で構成された、random_step。
    step_reset : Envs内のpresent_timeをリセットするかどうか。
    """
    # random_step分stepを進めて、とりあえず、リストとして保存。
    obs_list = []
    reward_list = []
    done_list = []
    runOK_list = []
    obs_list_append = obs_list.append
    reward_list_append = reward_list.append
    done_list_append = done_list.append
    runOK_list_append = runOK_list.append
    # random_actions.shape[0] == len(Envs)だが、やりたくない環境がある場合
    # やらないために、len(Envs)は使わない
    for i in range(random_actions.shape[0]):
        obs_progress = []
        reward_progress = []
        done_progress = []
        runOK_progress = []
        obs_progress_append = obs_progress.append
        reward_progress_append = reward_progress.append
        done_progress_append = done_progress.append
        runOK_progress_append = runOK_progress.append
        
        for j in range(random_actions.shape[1]):
            if random_actions[i,j].imag==0:  # 補完しただけのものには1jが入ってる
                obs_, done_, reward_, runOK_ = Envs[i].step(int(random_actions[i,j].real))
                obs_progress_append(obs_)
                reward_progress_append(reward_)
            else:
                done_, runOK_ = False, True
            done_progress_append(done_)
            runOK_progress_append(runOK_)
            
        obs_list_append(obs_progress)
        reward_list_append(reward_progress)
        done_list_append(done_progress)
        runOK_list_append(runOK_progress)
    
    # 進めた結果をまとめる。
    # obs → 最後のステップのobservation or 進めない場合、そのままのobservation
    # reward → 最後のステップのreward or 進めない場合、そのままのreward
    # done → 一個でもdoneがあれば、done=Trueとする。
    # runOK → 一個でも、Falseがあれば、Falseとする。
    obs = []
    reward = []
    done = []
    runOK = []
    obs_append = obs.append
    reward_append = reward.append
    done_append = done.append
    runOK_append = runOK.append
    for i in range(random_actions.shape[0]):
        if obs_list[i]==[]:
            obs_ = Envs[i].observation
        else:
            obs_ = obs_list[i][-1]
        obs_append(obs_)
        
        if reward_list[i]==[]:
            reward_ = Envs[i].reward
        else:
            reward_ = reward_list[i][-1]
        reward_append(reward_)
        
        if any(done_list[i]):
            done_ = True
        else:
            done_ = False
        done_append(done_)
        
        if all(runOK_list[i]):
            runOK_ = True
        else:
            runOK_ = False
        runOK_append(runOK_)
    obs = np.array(obs)
    
    if step_reset:
        for i in range(random_actions.shape[0]):
            Envs[i].present_time=0
            
    return obs, reward, done, runOK

In [124]:
# 流体版

# seedの設定
seed_num = 1
torch.manual_seed(seed_num)
if use_cuda:
    torch.cuda.manual_seed(seed_num)

# 実行環境を構築
torch.set_num_threads(seed_num)
# 実行環境を構築
Envs = makecase(NUM_PROCESSES, stride=100)  # strideは本来は300くらいでやる予定。

# 全エージェントが共有して持つ頭脳Brainを生成
n_out = Envs[0].action_space.shape[0]  # 行動の種類は27
actor_critic = Net(n_out).to(device)  # GPUへ
global_brain = Brain(actor_critic)

# 格納用変数の生成
obs_shape = Envs[0].observation_space.shape  # (3, 40, 12)
#obs_shape = (obs_shape[0] * NUM_STACK_FRAME,
#             *obs_shape[1:])  # (4, 84, 84)
# 状態数は一個でやる。よって、current_obsはそのままの値を格納。

# torch.Size([16, 3, 40, 12)
current_obs = torch.zeros(NUM_PROCESSES, *obs_shape).to(device)
rollouts = RolloutStorage(
    NUM_ADVANCED_STEP, NUM_PROCESSES, obs_shape)  # rolloutsのオブジェクト
episode_rewards = torch.zeros([NUM_PROCESSES, 1])  # 現在の試行の報酬を保持
final_rewards = torch.zeros([NUM_PROCESSES, 1])  # 最後の試行の報酬和を保持

# 初期状態の開始
obs = resets(Envs)
obs = torch.from_numpy(obs).float()  # torch.Size([16, 3, 40, 12])
current_obs = obs.to(device) # flameの4番目に最新のobsを格納

# advanced学習用のオブジェクトrolloutsの状態の1つ目に、現在の状態を保存
rollouts.observations[0].copy_(current_obs)

# 実行ループ
for j in tqdm(range(NUM_UPDATES)):
    # advanced学習するstep数ごとに計算
    for step in range(NUM_ADVANCED_STEP):

        # 行動を求める
        with torch.no_grad():
            action = actor_critic.act(rollouts.observations[step])
        
        cpu_actions = action.squeeze(1).cpu().numpy()  # tensorをNumPyに
        
        # 1stepの並列実行、なお返り値のobsのsizeは(16, 1, 84, 84)
        obs, reward, done,runOK = steps(Envs, cpu_actions)
        
        # 報酬をtensorに変換し、試行の総報酬に足す
        # sizeが(16,)になっているのを(16, 1)に変換
        reward = np.expand_dims(np.stack(reward), 1)
        reward = torch.from_numpy(reward).float()
        episode_rewards += reward
        
        # 各実行環境それぞれについて、doneならmaskは0に、継続中ならmaskは1にする
        masks = torch.FloatTensor(
            [[0.0] if done_ or not runOK_ else [1.0] for done_, runOK_ in zip(done,runOK)])
        # 最後の試行の総報酬を更新する
        final_rewards *= masks  # 継続中の場合は1をかけ算してそのまま、done時には0を掛けてリセット
        # 継続中は0を足す、done時にはepisode_rewardsを足す
        final_rewards += (1 - masks) * episode_rewards
        
        
        # 試行の総報酬を更新する
        episode_rewards *= masks  # 継続中のmaskは1なのでそのまま、doneの場合は0に
        
        # masksをGPUへ
        masks = masks.to(device)
        
        # 現在の状態をdone時には全部0にする
        # maskのサイズをtorch.Size([16, 1])→torch.Size([16, 1, 1 ,1])へ変換して、かけ算
        current_obs *= masks.unsqueeze(2).unsqueeze(2)
        
        # frameをstackする
        # torch.Size([16, 1, 40, 12])
        obs = torch.from_numpy(obs).float()
        current_obs = obs.to(device)  # 最新のobsを格納
        
        # メモリオブジェクトに今stepのtransitionを挿入
        rollouts.insert(current_obs, action.data, reward, masks)
        
    # advancedのfor loop終了

    # advancedした最終stepの状態から予想する状態価値を計算
    with torch.no_grad():
        next_value = actor_critic.get_value(
            rollouts.observations[-1]).detach()
        
        
    # 全stepの割引報酬和を計算して、rolloutsの変数returnsを更新
    rollouts.compute_returns(next_value)
    
    
    # ネットワークとrolloutの更新
    global_brain.update(rollouts)
    rollouts.after_update()
    
    # ログ：途中経過の出力
    if j % 100 == 0:
        print("finished frames {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}".
              format(j*NUM_PROCESSES*NUM_ADVANCED_STEP,
                     final_rewards.mean(),
                     final_rewards.median(),
                     final_rewards.min(),
                     final_rewards.max()))
    
    # 結合パラメータの保存
    if j % 12500 == 0:
        torch.save(global_brain.actor_critic.state_dict(),
                   'weight_'+str(j)+'.pth')

# 実行ループの終了
torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')

# 実行ループの終了
torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')


 10%|█         | 1/10 [04:58<44:42, 298.07s/it]

finished frames 0, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 40%|████      | 4/10 [19:13<28:46, 287.79s/it]

 Interrupted by the Keyboard
Killing PID 10182
 Interrupted by the Keyboard
Killing PID 10199


 50%|█████     | 5/10 [22:59<22:27, 269.43s/it]

 Interrupted by the Keyboard
Killing PID 10252


100%|██████████| 10/10 [23:04<00:00, 138.50s/it]


In [110]:
# 流体版

# seedの設定
seed_num = 1
torch.manual_seed(seed_num)
if use_cuda:
    torch.cuda.manual_seed(seed_num)

# 実行環境を構築
torch.set_num_threads(seed_num)
# 実行環境を構築
Envs = makecase(NUM_PROCESSES, stride=100)  # strideは本来は300くらいでやる予定。

# 全エージェントが共有して持つ頭脳Brainを生成
n_out = Envs[0].action_space.shape[0]  # 行動の種類は27
actor_critic = Net(n_out).to(device)  # GPUへ
global_brain = Brain(actor_critic)

# 格納用変数の生成
obs_shape = Envs[0].observation_space.shape  # (3, 40, 12)
#obs_shape = (obs_shape[0] * NUM_STACK_FRAME,
#             *obs_shape[1:])  # (4, 84, 84)
# 状態数は一個でやる。よって、current_obsはそのままの値を格納。

# torch.Size([16, 3, 40, 12)
current_obs = torch.zeros(NUM_PROCESSES, *obs_shape).to(device)
rollouts = RolloutStorage(
    NUM_ADVANCED_STEP, NUM_PROCESSES, obs_shape)  # rolloutsのオブジェクト
episode_rewards = torch.zeros([NUM_PROCESSES, 1])  # 現在の試行の報酬を保持
final_rewards = torch.zeros([NUM_PROCESSES, 1])  # 最後の試行の報酬和を保持

# 初期状態の開始
obs = resets(Envs)
obs = torch.from_numpy(obs).float()  # torch.Size([16, 3, 40, 12])
current_obs = obs.to(device) # flameの4番目に最新のobsを格納

# advanced学習用のオブジェクトrolloutsの状態の1つ目に、現在の状態を保存
rollouts.observations[0].copy_(current_obs)


tensor([[[[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

         [[25.8500, 25.8500, 25.8500,  ..., 25.8500, 25.8500, 25.8500],
          [25.8500, 25.8500, 2

In [121]:

# 実行ループ
#for j in tqdm(range(NUM_UPDATES)):
    # advanced学習するstep数ごとに計算
    #for step in range(NUM_ADVANCED_STEP):


j=0
step=0

# 行動を求める
with torch.no_grad():
    action = actor_critic.act(rollouts.observations[step])

cpu_actions = action.squeeze(1).cpu().numpy()  # tensorをNumPyに

# 1stepの並列実行、なお返り値のobsのsizeは(16, 1, 84, 84)
obs, reward, done,runOK = steps(Envs, cpu_actions)

# 報酬をtensorに変換し、試行の総報酬に足す
# sizeが(16,)になっているのを(16, 1)に変換
reward = np.expand_dims(np.stack(reward), 1)
reward = torch.from_numpy(reward).float()
episode_rewards += reward

# 各実行環境それぞれについて、doneならmaskは0に、継続中ならmaskは1にする
masks = torch.FloatTensor(
    [[0.0] if done_ or not runOK_ else [1.0] for done_, runOK_ in zip(done,runOK)])
# 最後の試行の総報酬を更新する
final_rewards *= masks  # 継続中の場合は1をかけ算してそのまま、done時には0を掛けてリセット
# 継続中は0を足す、done時にはepisode_rewardsを足す
final_rewards += (1 - masks) * episode_rewards


# 試行の総報酬を更新する
episode_rewards *= masks  # 継続中のmaskは1なのでそのまま、doneの場合は0に

# masksをGPUへ
masks = masks.to(device)

# 現在の状態をdone時には全部0にする
# maskのサイズをtorch.Size([16, 1])→torch.Size([16, 1, 1 ,1])へ変換して、かけ算
current_obs *= masks.unsqueeze(2).unsqueeze(2)

# frameをstackする
# torch.Size([16, 1, 40, 12])
obs = torch.from_numpy(obs).float()
current_obs = obs.to(device)  # 最新のobsを格納

# メモリオブジェクトに今stepのtransitionを挿入
rollouts.insert(current_obs, action.data, reward, masks)

# ------------------

# advancedのfor loop終了

# advancedした最終stepの状態から予想する状態価値を計算
with torch.no_grad():
    next_value = actor_critic.get_value(
        rollouts.observations[-1]).detach()
    
    
# 全stepの割引報酬和を計算して、rolloutsの変数returnsを更新
rollouts.compute_returns(next_value)


# ネットワークとrolloutの更新
global_brain.update(rollouts)
rollouts.after_update()

# ログ：途中経過の出力
if j % 100 == 0:
    print("finished frames {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}".
          format(j*NUM_PROCESSES*NUM_ADVANCED_STEP,
                 final_rewards.mean(),
                 final_rewards.median(),
                 final_rewards.min(),
                 final_rewards.max()))

# 結合パラメータの保存
if j % 12500 == 0:
    torch.save(global_brain.actor_critic.state_dict(),
               'weight_'+str(j)+'.pth')

# 実行ループの終了
torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')

 Interrupted by the Keyboard
Killing PID 8741
finished frames 0, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


In [47]:
# 微妙。とりあえずは使わない方向。
class SubprocAircond():
    def __init__(self, envs):
        # 今回は並列は考えない。
        # いつか考えようかと思うけど、流体の方にGPU使えばいいかなー。
        self.envs = envs
        self.observation_space = envs[0].observation_space
        self.action_space = envs[0].action_space

    def reset(self):
        Obs = []
        Obs_append = Obs.append
        for env in self.envs:
            obs = env.reset()
            Obs_append(obs)
        return Obs
    
    def step(self, actions):
        Obs, Done, RunOK = [], [], []
        Obs_append = Obs.append
        Done_append = Done.append
        RunOK_append = RunOK.append
        for env, action in zip(self.envs, actions):
            obs, done, runOK = env.step(action)
            Obs_append(obs)
            Done_append(done)
            RunOK_append(runOK)
        return Obs, Done, RunOK

In [44]:
## この環境の部分を変える

class Aircond_old:
    '''Aircondのクラス'''
    def __init__(self, CASE, stride=500, xCells=40,
                         insert_list = [15,15,15,15,33,33,33,51,69,69,69,87,105,105,105,142,142,142,342,342,380,380]):
        self.CASE = CASE
        # メッシュを作らないとpolymeshがないので。
        os.system(CASE.name + '/Makemesh')
        # get nCells
        with open (self.CASE.name + '/constant/polyMesh/neighbour') as f:
            neighbour = f.read()
        nCells_index = neighbour.find('nCells')
        nCells_ = neighbour[nCells_index : nCells_index+15]
        nCells = int(re.sub(r'\D', '', nCells_))
        self.nCells = nCells
        
        self.action_SPEED = np.array([0.1,0.3,0.5])
        self.action_DIRECTION = np.array([-1*np.pi/8, -2*np.pi/8,-3*np.pi/8])
        self.action_TEMPERTURE = np.array([18+273.15,22+273.15,26+273.15])
        self.action_space = np.tile(np.array([0,0,0]),(27,1))
        self.observation_space_ = np.tile(np.array([0,0,0]),(self.nCells,1))
        #self.observation_space = np.tile(np.array([0]), (self.nCells*3,1)
        
        self.xCells = xCells
        self.insert_list = insert_list
        observation_space = np.tile(np.array([0,0,0]), (self.nCells+len(self.insert_list),1))
        U_space_x = observation_space[:,0].reshape(self.xCells,-1)
        U_space_y = observation_space[:,1].reshape(self.xCells,-1)
        T_space = observation_space[:,2].reshape(self.xCells,-1)
        self.observation_space = np.array([U_space_x, U_space_y, T_space]) 
        
        self.stride = stride  # 進めるステップの幅
        # stepが始まってからのtime。始まる前にstepを進めた場合は含まれず0
        self.present_time = 0  
        # openFoam側のcontrolDictに記載されているtime
        self.startTime = 0
        self.endTime = copy(self.stride)
        # いつ終了するか
        self.end = 3000
        
        # 各辞書ファイルの取得
        self.initialDir = self.CASE.initialDir()+'/'
        self.constant = self.CASE.name + "/constant/"
        self.system = self.CASE.name + "/system/"
        self.initialDir_file = []
        for x in os.listdir(self.initialDir):
            if os.path.isfile(self.initialDir + x):
                self.initialDir_file.append(x)
        self.constant_file = []
        for y in os.listdir(self.constant):
            if os.path.isfile(self.constant + y):
                self.constant_file.append(y)
        self.system_file = []
        for z in os.listdir(self.system):
            if os.path.isfile(self.system + z):
                self.system_file.append(z)
        
        # 各辞書ファイルをそれぞれのファイル名で保存
        for i in range(len(self.initialDir_file)):
            self.__dict__[self.initialDir_file[i]] = ParsedParameterFile(self.initialDir + self.initialDir_file[i])

        for i in range(len(self.system_file)):
            self.__dict__[self.system_file[i]] = ParsedParameterFile(self.system + self.system_file[i])
            
    def initial_to_float(self, numpy_Parsed_value):
        '''uniformをnp.arrayに変換'''
        numpy_Parsed_value = np.array(numpy_Parsed_value)
        if numpy_Parsed_value.ndim==0:
            Parsed_raw = str(numpy_Parsed_value.all())
            Parsed_str = Parsed_raw[8:].strip('()').split(' ')
            Parsed_int = np.array(list(map(float,Parsed_str)))
            #Parsed = np.tile(Parsed_int,(self.nCells,1))
        return Parsed_int
    
    def initial_to_array(self, numpy_Parsed_value):
        '''uniformをnCellの数だけnp.arrayに変換'''
        numpy_Parsed_value = np.array(numpy_Parsed_value)
        if numpy_Parsed_value.ndim==0:
            Parsed_raw = str(numpy_Parsed_value.all())
            Parsed_str = Parsed_raw[8:].strip('()').split(' ')
            Parsed_int = np.array(list(map(float,Parsed_str)))
            Parsed = np.tile(Parsed_int,(self.nCells,1))
        return Parsed

    def make_observation_old(self,Dir):
        '''Dirのpathのobservationを取得'''
        U_value = np.array(ParsedParameterFile(Dir + '/U').content['internalField'])
        T_value = np.array(ParsedParameterFile(Dir + '/T').content['internalField'])
        if U_value.ndim == 0:
            U_value = self.initial_to_array(U_value)
            T_value = self.initial_to_array(T_value)
        U_value_xy = np.delete(U_value, axis=1, obj=2)
        T_value_x = np.reshape(T_value, [-1,1], order='F')
        Observation = np.concatenate([U_value_xy, T_value_x],axis=1)
        return Observation    
    
    def make_observation_onerow(self,Dir):
        '''Dirのpathのobservationを取得'''
        U_value = np.array(ParsedParameterFile(Dir + '/U').content['internalField'])
        T_value = np.array(ParsedParameterFile(Dir + '/T').content['internalField'])
        if U_value.ndim == 0:
            U_value = self.initial_to_array(U_value)
            T_value = self.initial_to_array(T_value)
        #U_value_xy = np.delete(U_value, axis=1, obj=2)
        T_value_x = T_value.reshape(-1, 1)
        U_value_x = U_value[:,0].reshape(-1, 1)
        U_value_y = U_value[:,1].reshape(-1, 1)
        observation = np.concatenate([U_value_x, U_value_y, T_value_x], axis=0)
        return observation
    
    def make_observation(self,Dir):
        '''observationを２次元で取得
        self.xCells : x方向のセル数
        self.insert_list : 障害物があり、値を0で埋めるべき場所
        '''
        U_value = np.array(ParsedParameterFile(Dir + '/U').content['internalField'])
        T_value = np.array(ParsedParameterFile(Dir + '/T').content['internalField'])
        if U_value.ndim == 0:
            U_value = self.initial_to_array(U_value)
            T_value = self.initial_to_array(T_value)
        #U_value_xy = np.delete(U_value, axis=1, obj=2)
        U_value_xy = np.delete(U_value, axis=1, obj=2)
        T_value_x = np.reshape(T_value, [-1,1], order='F')
        observation_ = np.concatenate([U_value_xy, T_value_x],axis=1)  # 3 axis observation
        observation_ = np.insert(observation_, self.insert_list, [0,0,0], axis=0)
        U_value_x = observation_[:,0].reshape(self.xCells,-1)
        U_value_y = observation_[:,1].reshape(self.xCells,-1)
        T_value = observation_[:,2].reshape(self.xCells,-1)
        observation = np.array([U_value_x, U_value_y, T_value])
        return observation
    
    def make_action(self):
        '''actionの設定'''
        Action = np.empty((0,3),float)
        for i in range(len(self.action_SPEED)):
            for j in range(len(self.action_DIRECTION)):
                for k in range(len(self.action_TEMPERTURE)):
                    Ux = self.action_SPEED[i]*np.cos(self.action_DIRECTION[j])
                    Uy = self.action_SPEED[i]*np.sin(self.action_DIRECTION[j])
                    Act = np.array([[Ux,Uy,self.action_TEMPERTURE[k]]])
                    Action = np.append(Action,Act,axis=0)
                    
        return Action
    
    def getParsed(self,time_step):
        '''各time_stepのParsedParameterFileを取得'''
        T = ParsedParameterFile(self.CASE.name + '/' + str(time_step) + '/T')
        U = ParsedParameterFile(self.CASE.name + '/' + str(time_step) + '/U')
        TU_list = [T,U]
        return TU_list
    
    
    def getParsedList(self,first_step, last_step, write_step,):
        '''各time_stepのParsedParameterFileを取得'''
        TU_list = []
        for stp in range(first_step, last_step, write_step):
            T = ParsedParameterFile(self.CASE.name + '/' + str(stp) + '/T')
            U = ParsedParameterFile(self.CASE.name + '/' + str(stp) + '/U')
            TU_list.append([T,U])
        return TU_list
    
    # 後にcythonで書き直す予定
    def calc_PMV(self, TA=20,VA=0.3,TR=20,RH=50,AL=1,CLO=1):
        '''PMVとPPDを計算'''
        #AL = 1  # 活動量[met]
        #CLO = 1 # 着衣量[clo]
        #TA = 20  #  温度[℃]
        #TR = 20  # MRT[℃]
        #VA = 0.3  # 流速[m/s]
        #RH = 50  # 相対湿度[%]
        #
        #***************************************************
        # 外部仕事 W＝0 [W/㎡]とする。
        #***************************************************
        # PMV 計算準備
        #
        M = AL * 58.15
        LCL = CLO
        W = 0
        #PA = (RH / 100 * np.exp(18.6686 - 4030.18 / (TA + 235))) / 0.00750062
        PPK = 673.4 - 1.8 * TA
        PPA = 3.2437814 + 0.00326014 * PPK + 2.00658 * 1E-9 * PPK * PPK * PPK
        PPB = (1165.09 - PPK) * (1 + 0.00121547 * PPK)
        PA = RH / 100 * 22105.8416 / np.exp(2.302585 * PPK * PPA / PPB) * 1000
        EPS = 1E-5
        MW = M - W
        # FCL＝着衣表面積／裸体表面積の比
        if LCL > 0.5:
            FCL = 1.05 + 0.1 * LCL
        else:
            FCL = 1 + 0.2 * LCL
        # 衣服表面温度TCLの初期値設定
        TCL = TA
        TCLA = TCL
        NOI = 1
        # 着衣表面温度の計算
        while True:
            TCLA = 0.8 * TCLA + 0.2 * TCL
            HC = 12.1 * np.sqrt(VA)
            if 2.38 * np.sqrt(np.sqrt(abs(TCL - TA))) > HC:
                HC = 2.38 * np.sqrt(np.sqrt(abs(TCL - TA)))
            TCL = 35.7 - 0.028 * MW - 0.155 * LCL * (3.96 * 1E-8 * FCL * ((TCLA + 273) ** 4 - (TR + 273) ** 4) + FCL * HC * (TCLA - TA))
            NOI = NOI + 1
            if NOI > 150:
                #PMV = 999990.999
                PMB = 3.0
                PPD = 100
                return (PMV,PPD)
            if not abs(TCLA - TCL) > EPS:
                break
        #PMVの計算
        PM1 = 3.96 * 1E-8 * FCL * ((TCL + 273) ** 4 - (TA + 273) ** 4)
        PM2 = FCL * HC * (TCL - TA)
        PM3 = 0.303 * np.exp(-0.036 * M) + 0.028
        if MW > 58.15:
            PM4 = 0.42 * (MW - 58.15)
        else:
            PM4 = 0
        PMV = PM3 * (MW - 3.05 * 0.001 * (5733 - 6.99 * MW - PA) - PM4 - 1.7 * 1E-5 * M * (5867 - PA) - 0.0014 * M * (34 - TA) - PM1 - PM2)
            #PRINT PMV
        if abs(PMV) > 3:
            #PMV = 999990.999
            PMV = 3.0
            PPD = 100
            return (PMV,PPD)
        
        PPD = 100 - 95 * np.exp(-0.0335 * PMV ** 4 - 0.2179 * PMV ** 2)
        
        return (PMV,PPD)
    
    def calc_MRT(self, T_Parsed):
        '''MRTを計算'''
        
        T_wall_list = np.array([])
        if np.array(T_Parsed['internalField']).ndim==0:  # time_step=0
            for boundary in list(T_Parsed['boundaryField']):
                if T_Parsed['boundaryField'][boundary]['type']=='zeroGradient' or \
                T_Parsed['boundaryField'][boundary]['type']=='empty' or \
                    T_Parsed['boundaryField'][boundary]['type']=='fixedValue':
                    T_wall = np.array([])
                else:
                    numpy_Parsed_value = np.array(T_Parsed['boundaryField'][boundary]['value'])
                    T_wall = self.initial_to_float(numpy_Parsed_value)
                T_wall_list = np.append(T_wall_list, T_wall)
                
        else:
            for boundary in list(T_Parsed['boundaryField']):
                if T_Parsed['boundaryField'][boundary]['type']=='fixedValue':
                    numpy_Parsed_value = np.array(T_Parsed['boundaryField'][boundary]['value'])
                    T_wall = self.initial_to_float(numpy_Parsed_value)
                elif T_Parsed['boundaryField'][boundary]['type']=='zeroGradient' or \
                T_Parsed['boundaryField'][boundary]['type']=='empty':
                    T_wall = np.array([])
                else:
                    T_wall = np.array(T_Parsed['boundaryField'][boundary]['value'])
                    if T_wall.ndim==0:
                        T_wall = self.initial_to_float(T_wall)
                T_wall_list = np.append(T_wall_list, T_wall)
        return np.average(T_wall_list)
    
    def Celsius(self, T):
        CelsiusT = T - 273.15
        return CelsiusT
    
    def Celsius_(self, T):
        '''セルシウス℃に変換'''
        if np.array(T).size==1:
            return self.Celsius(T)
        else:
            Celsiuss = np.frompyfunc(self.Celsius,1,1)  # リストに適用可にする
            return Celsiuss(T)
        
    def UScalar(self, U):
        '''Uをスカラーに変換'''
        if np.array(U).size<=3:
            return np.array([np.sqrt(U[0]**2 + U[1]**2)])
        else:
            return np.sqrt(U[:,0]**2 + U[:,1]**2)
        
    def calc_PMV_all(self, TU_Parsed):
        '''PMVを一つのtime_stepで全点計算'''
        
        T_Parsed,U_Parsed = TU_Parsed
        T = np.array(T_Parsed['internalField'])
        U = np.array(U_Parsed['internalField'])
        # time_step==0の場合
        # 一つの値に変換する
        if T.ndim==0 or U.ndim==0:
            T = self.initial_to_float(T)
            U = self.initial_to_float(U)
        # Uを速さに変換
        Us = self.UScalar(U)
        MRT = self.calc_MRT(T_Parsed)
        # TとMRTをセルシウス温度に変換
        Tc = list(self.Celsius_(T))
        MRTc = self.Celsius_(MRT)

        length = len(T)
        # ループを早くするため、外に出す。
        PMV = []
        PPD = []
        PMVappend = PMV.append
        PPDappend = PPD.append
        for i in range(length):
            pmv,ppd = self.calc_PMV(TA=Tc[i],VA=Us[i],TR=MRTc,RH=50,AL=1,CLO=1)
            PMVappend(pmv)
            PPDappend(ppd)
        return [PMV,PPD]
    
    def header(self, time_step, filename):
        '''headerファイルを作成'''
        header = """/*--------------------------------*- C++ -*----------------------------------*\
=========                 |
  \\      /  F ield         | OpenFOAM: The Open Source CFD Toolbox
   \\    /   O peration     | Website:  https://openfoam.org
    \\  /    A nd           | Version:  6
     \\/     M anipulation  |
\*---------------------------------------------------------------------------*/
FoamFile
{{
    version     2.0;
    format      ascii;
    class       volScalarField;
    location    "{}";
    object      {};
}}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
""".format(time_step, filename)
        return header
    
    def internal(self, list_internal):
        '''internalFieldの値の作成'''
        if len(list_internal)==1:
            internal = """
internalField   uniform {};""".format(list_internal[0])
        else:
            str_= np.frompyfunc(str,1,1)
            str_internal = '\n'.join(str_(list_internal))
            internal = """
internalField   nonuniform List<scalar> 
{}
(
{}
)
;
""".format(self.nCells, str_internal)
        return internal
    
    def makePMVFile(self,time_step):
        '''PMVとPPDファイルを書き込む'''
        
        path_pmv = self.CASE.name + '/' + str(time_step) + '/PMV' # 書き込むパス
        path_ppd = self.CASE.name + '/' + str(time_step) + '/PPD'
        
        demensions = """
dimensions      [0 0 0 0 0 0 0];
"""
        
        boundary = """
boundaryField
{
    ".*"
    {
        type            zeroGradient;
    }
}


// ************************************************************************* //
"""
        # header, dimensions, internal, boundaryの順に書き込む
        f = open(path_pmv, 'w') # ファイルを開く(該当ファイルがなければ新規作成)
        g = open(path_ppd, 'w')
        f.write(self.header(time_step,"PMV")) # headerを記載する
        g.write(self.header(time_step,"PPD"))
        f.write(demensions) # dimensionsを記載する
        g.write(demensions)
        # internalFieldの計算
        TU_Parsed = self.getParsed(time_step)
        PMV,PPD = self.calc_PMV_all(TU_Parsed)
        internal_PMV = self.internal(PMV)
        internal_PPD = self.internal(PPD)
        f.write(internal_PMV)  
        g.write(internal_PPD)
        f.write(boundary)
        g.write(boundary)
        f.close() 
        g.close()

        
    def makePMVList(self,first_step, last_step, write_step):
        '''任意の範囲でPMVファイルを作成'''
        for stp in range(first_step, last_step, write_step):
            self.makePMVFile(stp)
            
        
    def meshNumberFile(self,time_step):
        '''メッシュの並びを確認する'''
        path_mesh = self.CASE.name + '/' + str(time_step) + '/Meshnumber' # 書き込むパス


        demensions = """
dimensions      [0 0 0 0 0 0 0];
"""
        boundary = """
boundaryField
{
    ".*"
    {
        type            zeroGradient;
    }
}


// ************************************************************************* //
"""
        f = open(path_mesh, 'w') # ファイルを開く(該当ファイルがなければ新規作成)
        f.write(self.header(time_step,"PMV")) # headerを記載する
        f.write(demensions) # dimensionsを記載する
        mesh_list = [x for x in range(1,self.nCells+1)]
        internal_mesh = self.internal(mesh_list)
        f.write(internal_mesh)  
        f.write(boundary)
        f.close() 
            
    def calc_ADPI(self,TU_Parsed,occupied_zone_cell):
        '''ADPIを計算する'''
        
        # occupied_zone_cellはaircond5の場合は1~340までのセルが居住域
        T_Parsed,U_Parsed = TU_Parsed
        T = np.array(T_Parsed['internalField'])
        U = np.array(U_Parsed['internalField'])
        # time_step==0の場合
        if T.ndim==0 or U.ndim==0:
            T = self.initial_to_float(T)
            U = self.initial_to_float(U)
        
        Tc = np.average(T)  # 室内の平均温度
        Us = self.UScalar(U)  # 流速
        theta = (T - Tc) - 8.0*(Us - 0.15)  # 有効ドラフト温度
        
        satisfy_theta = np.where((theta > -1.5) & (theta < 1), 1, 0)
        satisfy_Us = np.where(Us < 0.35,1, 0)  # 条件を満たすものを1,満たさないものを0
        satisfy_all = satisfy_theta + satisfy_Us
        satisfy = satisfy_all[:occupied_zone_cell]
        nCells = satisfy.size
        num_satisfy = np.sum(satisfy == 2)
        ADPI = num_satisfy/nCells*100
        
        return (ADPI, theta)
    
    def calc_EUC(self,T_Parsed, occupied_zone_cell,last_cell):
        '''EUCを計算する'''
        
        T = np.array(T_Parsed['internalField'])
        T0 = self.initial_to_float(T_Parsed['boundaryField']['inlet']['value'])[0] # 給気温度

        if T.ndim==0:
            T = self.initial_to_float(T)[0]
            Toz = T
            Tiz = T
        else:
            Toz = np.average(T[occupied_zone_cell:last_cell])  # 居住域外の平均温度  
            Tiz = np.average(T[:occupied_zone_cell])  # 居住域内の平均温度
        EUC = (Toz-T0) / (Tiz-T0) * 100
        return EUC
        
    def getPMVList(self, first_step, last_step, write_step):
        '''任意の範囲のPMVの平均値ファイルを取得'''
        
        # ループを早くするため、外に出す。
        PMV_list = []
        PPD_list = []
        PMVappend = PMV_list.append
        PPDappend = PPD_list.append
        for stp in range(first_step, last_step, write_step):
            TU_Parsed = self.getParsed(stp)
            PMV,PPD = self.calc_PMV_all(TU_Parsed)
            pmv = np.average(np.array(PMV))
            ppd = np.average(np.array(PPD))
            PMVappend(pmv)
            PPDappend(ppd)
        return [PMV_list, PPD_list]
    
    def getPMVerrorList(self, first_step, last_step, write_step):
        '''任意の範囲のPMVの空間平均2乗誤差を取得'''
        
        # 工事中
        PMV_list = []
        PMVappend = PMV_list.append
        for stp in range(first_step, last_step, write_step):
            TU_Parsed = self.getParsed(stp)
            PMV,PPD = self.calc_PMV_all(TU_Parsed)
            
            pmv = np.average(np.array(PMV))
            PMVappend(pmv)
        return [PMV_list, PPD_list]
    
    def getADPIList(self, first_step, last_step, write_step,occupied_zone_cell=342):
        '''任意の範囲のADPIの値を取得'''
        
        ADPI_list = []
        ADPIappend = ADPI_list.append
        for stp in range(first_step, last_step, write_step):
            TU_Parsed = self.getParsed(stp)
            adpi,theta = self.calc_ADPI(TU_Parsed, occupied_zone_cell)
            ADPIappend(adpi)
        return ADPI_list
    
    def getEUCList(self, first_step, last_step, write_step,
                    occupied_zone_cell=342, last_cell=100000):
        '''任意の範囲のEUCの値を算出'''
        
        EUC_list = []
        EUCappend = EUC_list.append
        for stp in range(first_step, last_step, write_step):
            T_Parsed,U_Parsed = self.getParsed(stp)
            euc = self.calc_EUC(T_Parsed, occupied_zone_cell, last_cell)
            EUCappend(euc)
        return EUC_list
    
    def getTUList(self, first_step, last_step, write_step):
        '''任意の範囲のTとUの平均値を取得'''
        
        T_list = []
        U_list = []
        MRT_list = []
        Tappend = T_list.append
        Uappend = U_list.append
        MRTappend = MRT_list.append
        for stp in range(first_step, last_step, write_step):
            T_Parsed, U_Parsed = self.getParsed(stp)
            T = np.array(T_Parsed['internalField'])
            U = np.array(U_Parsed['internalField'])
            # time_step==0の場合
            if T.ndim==0 or U.ndim==0:
                T = self.initial_to_float(T)
                U = self.initial_to_float(U)
            # Uを速さに変換
            T = np.average(T)
            Us = np.average(np.array(self.UScalar(U)))
            MRT = np.average(np.array(self.calc_MRT(T_Parsed)))
            # TとMRTをセルシウス温度に変換
            Tc = self.Celsius(T)
            MRTc = self.Celsius(MRT)
            Tappend(Tc)
            Uappend(Us)
            MRTappend(MRTc)
        return [T_list,U_list,MRT_list]
        
        
        
    def change_control(self,control):
        if control == 1:
            self.blockMeshDict['blocks'][2] = Vector(20,10,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.02
        if control == 2:
            self.blockMeshDict['blocks'][2] = Vector(40,20,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.02
        if control == 3:
            self.blockMeshDict['blocks'][2] = Vector(20,10,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.01
        if control == 4:
            self.blockMeshDict['blocks'][2] = Vector(40,20,1)
            self.blockMeshDict.writeFile()
            self.controlDict['deltaT'] = 0.01
            
    def write_interval(self, writeInterval):
        self.controlDict['writeInterval'] = writeInterval
        
        
    def reset(self):
        '''環境のリセット'''
        
        # reset parameter
        self.present_time = 0  
        self.startTime = 0
        self.endTime = copy(self.stride)
        
        # reset control Dict
        clDict = ParsedParameterFile(self.CASE.controlDict())
        clDict['startTime'] = self.startTime
        clDict['endTime'] = self.endTime
        clDict.writeFile()
        #self.startTime = clDict['startTime']
        #self.endTime = clDict['endTime']
        
        #os.system('./Allclean')
        os.system(self.CASE.name + '/Makemesh')
        
        # 初期条件の設定（ランダム）
        T_initial = ParsedParameterFile(self.CASE.initialDir() + '/T')
        # random parameter from 26 to 35
        T_rand = np.random.randint(26+273,35+273)
        T_initial['internalField'].setUniform(T_rand)
        T_initial.writeFile()
        
        
        # set action and observation
        self.action_space= self.make_action()
        self.observation = self.make_observation(self.CASE.initialDir())
        return self.observation
    
    def step_old(self, action):
        '''ステップを進める'''
        #clDict = ParsedParameterFile(self.CASE.controlDict())      
        if self.present_time >= self.end:
            done = True
            runOK = 'end'
        else:
            done = False
            
            # actionに従った、境界条件を設定
            # action is 0~26
            U_latest = ParsedParameterFile(self.CASE.latestDir() + '/U')
            T_latest = ParsedParameterFile(self.CASE.latestDir() + '/T')
            self.act = self.action_space[action]
            U_latest['boundaryField']['inlet']['value'].setUniform(Vector(self.act[0],self.act[1],0))
            U_latest.writeFile()
            T_latest['boundaryField']['inlet']['value'].setUniform(self.act[2])
            T_latest.writeFile()
            
            # OpenFOAMのコマンドを実行
            args=shlex.split("buoyantPimpleFoam -case " + self.CASE.name)
            buoyant=BasicRunner(args,silent=True)
            self.summary=buoyant.start()
            runOK = buoyant.runOK()
            
            #os.system("buoyantBoussinesqPimpleFoam")
            
            # clDictのコントロール
            self.present_time += self.stride
            clDict = ParsedParameterFile(self.CASE.controlDict())
            self.startTime += self.stride
            self.endTime += self.stride
            clDict['startTime'] = self.startTime
            clDict['endTime'] = self.endTime
            clDict.writeFile()
            
            self.startTime = clDict['startTime']
            self.endTime = clDict['endTime']
            
            self.observation = self.make_observation(self.CASE.latestDir())
            
        return (self.observation, done, runOK)
    
    
    def step(self, action, reward='PMV'):
        '''ステップを進める'''
        #clDict = ParsedParameterFile(self.CASE.controlDict())      
        if self.present_time >= self.end:
            done = True
            runOK = 'end'
        else:
            done = False
            
            # actionに従った、境界条件を設定
            # action is 0~26
            U_latest = ParsedParameterFile(self.CASE.latestDir() + '/U')
            T_latest = ParsedParameterFile(self.CASE.latestDir() + '/T')
            self.act = self.action_space[action]
            U_latest['boundaryField']['inlet']['value'].setUniform(Vector(self.act[0],self.act[1],0))
            U_latest.writeFile()
            T_latest['boundaryField']['inlet']['value'].setUniform(self.act[2])
            T_latest.writeFile()
            
            # OpenFOAMのコマンドを実行
            args=shlex.split("buoyantPimpleFoam -case " + self.CASE.name)
            buoyant=BasicRunner(args,silent=True)
            self.summary=buoyant.start()
            runOK = buoyant.runOK()
            
            #os.system("buoyantBoussinesqPimpleFoam")
            
            # clDictのコントロール
            self.present_time += self.stride
            clDict = ParsedParameterFile(self.CASE.controlDict())
            self.startTime += self.stride
            self.endTime += self.stride
            clDict['startTime'] = self.startTime
            clDict['endTime'] = self.endTime
            clDict.writeFile()
            
            self.startTime = clDict['startTime']
            self.endTime = clDict['endTime']
            
            self.observation = self.make_observation(self.CASE.latestDir())
            
        return (self.observation, done, runOK)
        

In [19]:
# 流体版

# seedの設定
seed_num = 1
torch.manual_seed(seed_num)
if use_cuda:
    torch.cuda.manual_seed(seed_num)

# 実行環境を構築
torch.set_num_threads(seed_num)
# 実行環境を構築
Envs = makecase(NUM_PROCESSES, stride=100)  # strideは本来は300くらいでやる予定。

# 全エージェントが共有して持つ頭脳Brainを生成
n_out = Envs[0].action_space.shape[0]  # 行動の種類は27
actor_critic = Net(n_out).to(device)  # GPUへ
global_brain = Brain(actor_critic)

# 格納用変数の生成
obs_shape = Envs[0].observation_space.shape  # (3, 40, 12)
#obs_shape = (obs_shape[0] * NUM_STACK_FRAME,
#             *obs_shape[1:])  # (4, 84, 84)
# 状態数は一個でやる。よって、current_obsはそのままの値を格納。

# torch.Size([16, 3, 40, 12)
current_obs = torch.zeros(NUM_PROCESSES, *obs_shape).to(device)
rollouts = RolloutStorage(
    NUM_ADVANCED_STEP, NUM_PROCESSES, obs_shape)  # rolloutsのオブジェクト
episode_rewards = torch.zeros([NUM_PROCESSES, 1])  # 現在の試行の報酬を保持
final_rewards = torch.zeros([NUM_PROCESSES, 1])  # 最後の試行の報酬和を保持

# 初期状態の開始
obs = resets(Envs)
obs = torch.from_numpy(obs).float()  # torch.Size([16, 3, 40, 12])
current_obs = obs.to(device) # flameの4番目に最新のobsを格納

# advanced学習用のオブジェクトrolloutsの状態の1つ目に、現在の状態を保存
rollouts.observations[0].copy_(current_obs)

# 実行ループ
for j in tqdm(range(NUM_UPDATES)):
    # advanced学習するstep数ごとに計算
    for step in range(NUM_ADVANCED_STEP):

        # 行動を求める
        with torch.no_grad():
            action = actor_critic.act(rollouts.observations[step])
        
        cpu_actions = action.squeeze(1).cpu().numpy()  # tensorをNumPyに
        
        # 1stepの並列実行、なお返り値のobsのsizeは(16, 1, 84, 84)
        obs, reward, done,runOK = steps(Envs, cpu_actions)
        
        # 報酬をtensorに変換し、試行の総報酬に足す
        # sizeが(16,)になっているのを(16, 1)に変換
        reward = np.expand_dims(np.stack(reward), 1)
        reward = torch.from_numpy(reward).float()
        episode_rewards += reward
        
        # 各実行環境それぞれについて、doneならmaskは0に、継続中ならmaskは1にする
        masks = torch.FloatTensor(
            [[0.0] if done_ or not runOK_ else [1.0] for done_, runOK_ in zip(done,runOK)])
        # 最後の試行の総報酬を更新する
        final_rewards *= masks  # 継続中の場合は1をかけ算してそのまま、done時には0を掛けてリセット
        # 継続中は0を足す、done時にはepisode_rewardsを足す
        final_rewards += (1 - masks) * episode_rewards
        
        
        # 試行の総報酬を更新する
        episode_rewards *= masks  # 継続中のmaskは1なのでそのまま、doneの場合は0に
        
        # masksをGPUへ
        masks = masks.to(device)
        
        # 現在の状態をdone時には全部0にする
        # maskのサイズをtorch.Size([16, 1])→torch.Size([16, 1, 1 ,1])へ変換して、かけ算
        current_obs *= masks.unsqueeze(2).unsqueeze(2)
        
        # frameをstackする
        # torch.Size([16, 1, 40, 12])
        obs = torch.from_numpy(obs).float()
        current_obs = obs.to(device)  # 最新のobsを格納
        
        # メモリオブジェクトに今stepのtransitionを挿入
        rollouts.insert(current_obs, action.data, reward, masks)
        
    # advancedのfor loop終了

    # advancedした最終stepの状態から予想する状態価値を計算
    with torch.no_grad():
        next_value = actor_critic.get_value(
            rollouts.observations[-1]).detach()
        
        
    # 全stepの割引報酬和を計算して、rolloutsの変数returnsを更新
    rollouts.compute_returns(next_value)
    
    
    # ネットワークとrolloutの更新
    global_brain.update(rollouts)
    rollouts.after_update()
    
    # ログ：途中経過の出力
    if j % 100 == 0:
        print("finished frames {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}".
              format(j*NUM_PROCESSES*NUM_ADVANCED_STEP,
                     final_rewards.mean(),
                     final_rewards.median(),
                     final_rewards.min(),
                     final_rewards.max()))
    
    # 結合パラメータの保存
    if j % 12500 == 0:
        torch.save(global_brain.actor_critic.state_dict(),
                   'weight_'+str(j)+'.pth')

# 実行ループの終了
torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')

# 実行ループの終了
torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')


TypeError: __init__() got an unexpected keyword argument 'x_direction_Cells'

In [110]:
# 流体版

# seedの設定
seed_num = 1
torch.manual_seed(seed_num)
if use_cuda:
    torch.cuda.manual_seed(seed_num)

# 実行環境を構築
torch.set_num_threads(seed_num)
# 実行環境を構築
Envs = makecase(NUM_PROCESSES, stride=100)  # strideは本来は300くらいでやる予定。

# 全エージェントが共有して持つ頭脳Brainを生成
n_out = Envs[0].action_space.shape[0]  # 行動の種類は27
actor_critic = Net(n_out).to(device)  # GPUへ
global_brain = Brain(actor_critic)

# 格納用変数の生成
obs_shape = Envs[0].observation_space.shape  # (3, 40, 12)
#obs_shape = (obs_shape[0] * NUM_STACK_FRAME,
#             *obs_shape[1:])  # (4, 84, 84)
# 状態数は一個でやる。よって、current_obsはそのままの値を格納。

# torch.Size([16, 3, 40, 12)
current_obs = torch.zeros(NUM_PROCESSES, *obs_shape).to(device)
rollouts = RolloutStorage(
    NUM_ADVANCED_STEP, NUM_PROCESSES, obs_shape)  # rolloutsのオブジェクト
episode_rewards = torch.zeros([NUM_PROCESSES, 1])  # 現在の試行の報酬を保持
final_rewards = torch.zeros([NUM_PROCESSES, 1])  # 最後の試行の報酬和を保持

# 初期状態の開始
obs = resets(Envs)
obs = torch.from_numpy(obs).float()  # torch.Size([16, 3, 40, 12])
current_obs = obs.to(device) # flameの4番目に最新のobsを格納

# advanced学習用のオブジェクトrolloutsの状態の1つ目に、現在の状態を保存
rollouts.observations[0].copy_(current_obs)


tensor([[[[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

         [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

         [[25.8500, 25.8500, 25.8500,  ..., 25.8500, 25.8500, 25.8500],
          [25.8500, 25.8500, 2

In [121]:

# 実行ループ
#for j in tqdm(range(NUM_UPDATES)):
    # advanced学習するstep数ごとに計算
    #for step in range(NUM_ADVANCED_STEP):


j=0
step=0

# 行動を求める
with torch.no_grad():
    action = actor_critic.act(rollouts.observations[step])

cpu_actions = action.squeeze(1).cpu().numpy()  # tensorをNumPyに

# 1stepの並列実行、なお返り値のobsのsizeは(16, 1, 84, 84)
obs, reward, done,runOK = steps(Envs, cpu_actions)

# 報酬をtensorに変換し、試行の総報酬に足す
# sizeが(16,)になっているのを(16, 1)に変換
reward = np.expand_dims(np.stack(reward), 1)
reward = torch.from_numpy(reward).float()
episode_rewards += reward

# 各実行環境それぞれについて、doneならmaskは0に、継続中ならmaskは1にする
masks = torch.FloatTensor(
    [[0.0] if done_ or not runOK_ else [1.0] for done_, runOK_ in zip(done,runOK)])
# 最後の試行の総報酬を更新する
final_rewards *= masks  # 継続中の場合は1をかけ算してそのまま、done時には0を掛けてリセット
# 継続中は0を足す、done時にはepisode_rewardsを足す
final_rewards += (1 - masks) * episode_rewards


# 試行の総報酬を更新する
episode_rewards *= masks  # 継続中のmaskは1なのでそのまま、doneの場合は0に

# masksをGPUへ
masks = masks.to(device)

# 現在の状態をdone時には全部0にする
# maskのサイズをtorch.Size([16, 1])→torch.Size([16, 1, 1 ,1])へ変換して、かけ算
current_obs *= masks.unsqueeze(2).unsqueeze(2)

# frameをstackする
# torch.Size([16, 1, 40, 12])
obs = torch.from_numpy(obs).float()
current_obs = obs.to(device)  # 最新のobsを格納

# メモリオブジェクトに今stepのtransitionを挿入
rollouts.insert(current_obs, action.data, reward, masks)

# ------------------

# advancedのfor loop終了

# advancedした最終stepの状態から予想する状態価値を計算
with torch.no_grad():
    next_value = actor_critic.get_value(
        rollouts.observations[-1]).detach()
    
    
# 全stepの割引報酬和を計算して、rolloutsの変数returnsを更新
rollouts.compute_returns(next_value)


# ネットワークとrolloutの更新
global_brain.update(rollouts)
rollouts.after_update()

# ログ：途中経過の出力
if j % 100 == 0:
    print("finished frames {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}".
          format(j*NUM_PROCESSES*NUM_ADVANCED_STEP,
                 final_rewards.mean(),
                 final_rewards.median(),
                 final_rewards.min(),
                 final_rewards.max()))

# 結合パラメータの保存
if j % 12500 == 0:
    torch.save(global_brain.actor_critic.state_dict(),
               'weight_'+str(j)+'.pth')

# 実行ループの終了
torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')

 Interrupted by the Keyboard
Killing PID 8741
finished frames 0, mean/median reward 0.0/0.0, min/max reward 0.0/0.0
