In [1]:
import os
import csv

#reads all the episodes in a particular folder and reutns them in a array consisting of arrays with tuples
def read_Episodes(baseFolder):
    episodes = []
    for filename in sorted(os.listdir(baseFolder)):
        filePath = os.path.join(baseFolder, filename)
        episode = []
        with open(filePath, "r", newline="") as file:
            reader = csv.reader(file)
            for row in reader:
                observation = [float(epi) for epi in row]
                episode.append(observation)
            episodes.append(episode)        
    return episodes

In [2]:
import d3rlpy
import numpy as np
import os

EPIfolder = "../data/episFormula/3. corrected sign"

epis = read_Episodes(EPIfolder)

observations = []
actions = []
rewards = []
terminals = []

for epi in epis:
    for observation in epi:
        observations.append(observation[:-2])
        actions.append(observation[-2:-1])
        rewards.append(observation[-1])
    terminals += [0] * (len(epi) - 1) + [1]

observations=np.array(observations)
actions=np.array(actions)
rewards=np.array(rewards)
terminals=np.array(terminals)

print(observations.shape)

dataset = d3rlpy.dataset.MDPDataset(
    action_space=d3rlpy.constants.ActionSpace.CONTINUOUS,
    observations=observations,
    actions=actions,
    rewards=rewards,
    terminals=terminals,
)

# use partial episodes as test data
test_episodes = dataset.episodes[:200]

# create action scaler
action_scaler = d3rlpy.preprocessing.MinMaxActionScaler()
observation_scaler = d3rlpy.preprocessing.MinMaxObservationScaler()

# combine FileAdapterFactory and TensorboardAdapterFactory
logger_adapter = d3rlpy.logging.CombineAdapterFactory([
   d3rlpy.logging.FileAdapterFactory(root_dir="logs/d3rlpy_logs"),
   d3rlpy.logging.TensorboardAdapterFactory(root_dir="logs/tensorboard_logs"),
])

# if you don't use GPU, set device=None instead.
#behaviorCloning = d3rlpy.algos.BCConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
DDPG = d3rlpy.algos.DDPGConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
TD3 = d3rlpy.algos.TD3Config(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
SAC = d3rlpy.algos.SACConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
BCQ = d3rlpy.algos.BCQConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
BEAR = d3rlpy.algos.BEARConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
CQL = d3rlpy.algos.CQLConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
CRR = d3rlpy.algos.CRRConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
CalQL= d3rlpy.algos.CalQLConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
AWAC = d3rlpy.algos.AWACConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
PLAS = d3rlpy.algos.PLASConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
PLASP = d3rlpy.algos.PLASWithPerturbationConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
TD3BC = d3rlpy.algos.TD3PlusBCConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
PRDC = d3rlpy.algos.PRDCConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
ReBRAC = d3rlpy.algos.ReBRACConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')
IQL = d3rlpy.algos.IQLConfig(action_scaler=action_scaler, observation_scaler=observation_scaler).create(device='cuda:0')

Models = [DDPG, TD3, SAC, BCQ, BEAR, CQL, CRR, CalQL, AWAC, PLAS, PLASP, TD3BC, PRDC, ReBRAC, IQL]

for rlModel in Models:
    print("Training model: ", rlModel.__class__.__name__)
    result = rlModel.fit(
        dataset,
        n_steps=200000,
        n_steps_per_epoch=1000,
        evaluators={
           'td_error': d3rlpy.metrics.TDErrorEvaluator(test_episodes),
           'value_scale': d3rlpy.metrics.AverageValueEstimationEvaluator(test_episodes),
           'discounted_advantage': d3rlpy.metrics.DiscountedSumOfAdvantageEvaluator(test_episodes),
           'initial_state': d3rlpy.metrics.InitialStateValueEstimationEvaluator(test_episodes),
           'diff_eval': d3rlpy.metrics.ContinuousActionDiffEvaluator(test_episodes)
        },
        logger_adapter=logger_adapter,
    )

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  from .autonotebook import tqdm as notebook_tqdm


(23981, 7)
2025-12-05 23:19.03 [info     ] Signatures have been automatically determined. action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]) observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]) reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)])
2025-12-05 23:19.03 [info     ] Action size has been automatically determined. action_size=1
Training model:  DDPG
2025-12-05 23:19.03 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=1)
2025-12-05 23:19.03 [debug    ] Fitting observation scaler...  observation_scaler=min_max
2025-12-05 23:19.03 [debug    ] Fitting action scaler...       action_scaler=min_max
2025-12-05 23:19.03 [debug    ] Building models...          

Epoch 1/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.02it/s, critic_loss=0.236, actor_loss=-2.01]


2025-12-05 23:19.27 [info     ] DDPG_20251205231906: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.004881596088409424, 'time_algorithm_update': 0.012094255924224853, 'critic_loss': 0.23970390455424787, 'actor_loss': -2.027244101472199, 'time_step': 0.01722714161872864, 'td_error': 1.6770780791805877, 'value_scale': 4.006850385546609, 'discounted_advantage': -7.005441939245865, 'initial_state': 3.9310083389282227, 'diff_eval': 113474.19222585033} step=1000
2025-12-05 23:19.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.78it/s, critic_loss=0.949, actor_loss=-6.37]


2025-12-05 23:19.46 [info     ] DDPG_20251205231906: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.005006111860275268, 'time_algorithm_update': 0.011146811485290528, 'critic_loss': 0.9524928363859654, 'actor_loss': -6.398542202949524, 'time_step': 0.016410166025161745, 'td_error': 2.373068789188043, 'value_scale': 8.555549411179634, 'discounted_advantage': -11.18883723507753, 'initial_state': 8.348832130432129, 'diff_eval': 113474.30047755789} step=2000
2025-12-05 23:19.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.16it/s, critic_loss=1.46, actor_loss=-12.5]


2025-12-05 23:20.06 [info     ] DDPG_20251205231906: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.0049090924263000485, 'time_algorithm_update': 0.011146048307418823, 'critic_loss': 1.4557442080974579, 'actor_loss': -12.520259239196777, 'time_step': 0.016302711009979248, 'td_error': 3.060588326731077, 'value_scale': 14.456434610903212, 'discounted_advantage': -20.560873599101548, 'initial_state': 14.108388900756836, 'diff_eval': 113474.30763928908} step=3000
2025-12-05 23:20.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.44it/s, critic_loss=1.7, actor_loss=-19.9]


2025-12-05 23:20.26 [info     ] DDPG_20251205231906: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.005087752819061279, 'time_algorithm_update': 0.011442465782165527, 'critic_loss': 1.6980558408498765, 'actor_loss': -19.92945237350464, 'time_step': 0.016782832622528075, 'td_error': 4.206384491205328, 'value_scale': 22.08286423175553, 'discounted_advantage': -29.90091814506947, 'initial_state': 20.3523006439209, 'diff_eval': 113474.30842676027} step=4000
2025-12-05 23:20.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:19<00:00, 52.02it/s, critic_loss=2.68, actor_loss=-29.2]


2025-12-05 23:20.49 [info     ] DDPG_20251205231906: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.005421143770217895, 'time_algorithm_update': 0.013167608737945556, 'critic_loss': 2.6849766627550125, 'actor_loss': -29.26711989402771, 'time_step': 0.01885373067855835, 'td_error': 9.598002658186276, 'value_scale': 33.251182060929175, 'discounted_advantage': -47.19165454735982, 'initial_state': 25.490694046020508, 'diff_eval': 113474.30846605197} step=5000
2025-12-05 23:20.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.20it/s, critic_loss=4.87, actor_loss=-41.9]


2025-12-05 23:21.08 [info     ] DDPG_20251205231906: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.004736345052719116, 'time_algorithm_update': 0.010703705549240113, 'critic_loss': 4.8826081008911135, 'actor_loss': -41.97580633544922, 'time_step': 0.01572460651397705, 'td_error': 21.44448968208648, 'value_scale': 49.571112133231566, 'discounted_advantage': -75.82164417400286, 'initial_state': 35.024356842041016, 'diff_eval': 113474.30846605197} step=6000
2025-12-05 23:21.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.45it/s, critic_loss=10.1, actor_loss=-58.3]


2025-12-05 23:21.29 [info     ] DDPG_20251205231906: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.0054540207386016845, 'time_algorithm_update': 0.012349054098129272, 'critic_loss': 10.171327314853668, 'actor_loss': -58.385045177459716, 'time_step': 0.018055219650268555, 'td_error': 54.994536375727236, 'value_scale': 73.51576630887321, 'discounted_advantage': -114.66373182845143, 'initial_state': 45.6922607421875, 'diff_eval': 113474.30846605197} step=7000
2025-12-05 23:21.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.57it/s, critic_loss=22.6, actor_loss=-82.4]


2025-12-05 23:21.50 [info     ] DDPG_20251205231906: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.005490737915039063, 'time_algorithm_update': 0.011609639883041381, 'critic_loss': 22.74204150104523, 'actor_loss': -82.5016289138794, 'time_step': 0.01735289692878723, 'td_error': 142.49422918500696, 'value_scale': 112.04120549332266, 'discounted_advantage': -173.077770311445, 'initial_state': 67.66693115234375, 'diff_eval': 113474.30846605197} step=8000
2025-12-05 23:21.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.70it/s, critic_loss=61.2, actor_loss=-121]


2025-12-05 23:22.09 [info     ] DDPG_20251205231906: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.004742084741592407, 'time_algorithm_update': 0.010895596742630005, 'critic_loss': 61.64369706535339, 'actor_loss': -121.17678176879883, 'time_step': 0.015889512062072755, 'td_error': 353.81560940679697, 'value_scale': 169.6264198300025, 'discounted_advantage': -259.17145003959104, 'initial_state': 102.42118835449219, 'diff_eval': 113474.30846605197} step=9000
2025-12-05 23:22.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.87it/s, critic_loss=175, actor_loss=-181]


2025-12-05 23:22.27 [info     ] DDPG_20251205231906: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.004616904497146607, 'time_algorithm_update': 0.010750845909118652, 'critic_loss': 175.95397213745116, 'actor_loss': -181.192179397583, 'time_step': 0.015607517957687379, 'td_error': 1090.7460940679143, 'value_scale': 260.1023562496086, 'discounted_advantage': -432.8469215104208, 'initial_state': 147.7711944580078, 'diff_eval': 113474.30846605197} step=10000
2025-12-05 23:22.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.62it/s, critic_loss=555, actor_loss=-282]


2025-12-05 23:22.46 [info     ] DDPG_20251205231906: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.004519534111022949, 'time_algorithm_update': 0.01067518424987793, 'critic_loss': 557.702544052124, 'actor_loss': -282.78995222473145, 'time_step': 0.015436984062194825, 'td_error': 3171.2156271339586, 'value_scale': 414.9081748840895, 'discounted_advantage': -669.9878823962896, 'initial_state': 251.08343505859375, 'diff_eval': 113474.30846605197} step=11000
2025-12-05 23:22.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.95it/s, critic_loss=1.85e+3, actor_loss=-474]


2025-12-05 23:23.05 [info     ] DDPG_20251205231906: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.004577706336975098, 'time_algorithm_update': 0.010769536256790162, 'critic_loss': 1865.5924006347657, 'actor_loss': -475.1445191345215, 'time_step': 0.015582446098327637, 'td_error': 10464.688682383483, 'value_scale': 694.4211064967806, 'discounted_advantage': -1163.7575187568123, 'initial_state': 502.55499267578125, 'diff_eval': 113474.30846605197} step=12000
2025-12-05 23:23.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.02it/s, critic_loss=6.89e+3, actor_loss=-853]


2025-12-05 23:23.24 [info     ] DDPG_20251205231906: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.004629954814910888, 'time_algorithm_update': 0.010941874980926514, 'critic_loss': 6937.328929199219, 'actor_loss': -855.1202528076171, 'time_step': 0.01582710576057434, 'td_error': 34060.77558242785, 'value_scale': 1193.6478926857678, 'discounted_advantage': -2002.7897327956766, 'initial_state': 976.8909301757812, 'diff_eval': 113474.30846605197} step=13000
2025-12-05 23:23.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.65it/s, critic_loss=2.67e+4, actor_loss=-1.5e+3]


2025-12-05 23:23.45 [info     ] DDPG_20251205231906: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.005077149629592896, 'time_algorithm_update': 0.011126524925231933, 'critic_loss': 26869.101427734375, 'actor_loss': -1507.0472790527344, 'time_step': 0.01645745015144348, 'td_error': 96607.24140739601, 'value_scale': 1978.937358488481, 'discounted_advantage': -3264.488920268701, 'initial_state': 1695.56201171875, 'diff_eval': 113474.30846605197} step=14000
2025-12-05 23:23.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:18<00:00, 53.04it/s, critic_loss=8.87e+4, actor_loss=-2.49e+3]


2025-12-05 23:24.07 [info     ] DDPG_20251205231906: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.005244499444961548, 'time_algorithm_update': 0.012941229343414306, 'critic_loss': 89360.79145117187, 'actor_loss': -2492.604143432617, 'time_step': 0.01848643207550049, 'td_error': 242599.13633094908, 'value_scale': 3117.9278226790066, 'discounted_advantage': -4948.133044103948, 'initial_state': 2761.73876953125, 'diff_eval': 113474.30846605197} step=15000
2025-12-05 23:24.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.51it/s, critic_loss=2.61e+5, actor_loss=-4e+3]  


2025-12-05 23:24.27 [info     ] DDPG_20251205231906: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.005028791427612304, 'time_algorithm_update': 0.011172343492507935, 'critic_loss': 262456.6522578125, 'actor_loss': -4005.127666748047, 'time_step': 0.01647158885002136, 'td_error': 688023.3245711474, 'value_scale': 5009.812971909708, 'discounted_advantage': -7891.440114302189, 'initial_state': 4580.38525390625, 'diff_eval': 113474.30846605197} step=16000
2025-12-05 23:24.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.75it/s, critic_loss=7.87e+5, actor_loss=-6.48e+3]


2025-12-05 23:24.46 [info     ] DDPG_20251205231906: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.0046428611278533935, 'time_algorithm_update': 0.010769784688949584, 'critic_loss': 791779.12165625, 'actor_loss': -6491.2106376953125, 'time_step': 0.015657329320907593, 'td_error': 2041933.2597605088, 'value_scale': 8121.292781091196, 'discounted_advantage': -13583.23539407005, 'initial_state': 7643.25390625, 'diff_eval': 113474.30846605197} step=17000
2025-12-05 23:24.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.88it/s, critic_loss=2.4e+6, actor_loss=-1.05e+4]


2025-12-05 23:25.05 [info     ] DDPG_20251205231906: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.004913021326065064, 'time_algorithm_update': 0.01096819543838501, 'critic_loss': 2415606.05875, 'actor_loss': -10516.133088867187, 'time_step': 0.0161268208026886, 'td_error': 5662547.87502865, 'value_scale': 12979.36304836481, 'discounted_advantage': -21848.578627457628, 'initial_state': 12394.0634765625, 'diff_eval': 113474.30846605197} step=18000
2025-12-05 23:25.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.14it/s, critic_loss=6.84e+6, actor_loss=-1.68e+4]


2025-12-05 23:25.24 [info     ] DDPG_20251205231906: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.0047271127700805665, 'time_algorithm_update': 0.01083655858039856, 'critic_loss': 6876057.59, 'actor_loss': -16813.725509765623, 'time_step': 0.015811507940292357, 'td_error': 14744643.534507118, 'value_scale': 20597.450784688022, 'discounted_advantage': -32637.62537295593, 'initial_state': 20009.314453125, 'diff_eval': 113474.30846605197} step=19000
2025-12-05 23:25.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.11it/s, critic_loss=1.85e+7, actor_loss=-2.63e+4]


2025-12-05 23:25.43 [info     ] DDPG_20251205231906: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.004796480894088745, 'time_algorithm_update': 0.011011091470718383, 'critic_loss': 18630768.6865, 'actor_loss': -26351.938091796874, 'time_step': 0.01605946445465088, 'td_error': 37311783.23795955, 'value_scale': 31871.57183177913, 'discounted_advantage': -50886.857113391714, 'initial_state': 31513.310546875, 'diff_eval': 113474.30846605197} step=20000
2025-12-05 23:25.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.13it/s, critic_loss=4.88e+7, actor_loss=-4.08e+4]


2025-12-05 23:26.01 [info     ] DDPG_20251205231906: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.004563264608383179, 'time_algorithm_update': 0.010497959613800048, 'critic_loss': 49073101.908, 'actor_loss': -40839.10733398437, 'time_step': 0.015300198793411254, 'td_error': 94005890.7839733, 'value_scale': 48609.2444877017, 'discounted_advantage': -81895.3118246677, 'initial_state': 48920.66015625, 'diff_eval': 113474.30846605197} step=21000
2025-12-05 23:26.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.57it/s, critic_loss=1.19e+8, actor_loss=-6.18e+4]


2025-12-05 23:26.20 [info     ] DDPG_20251205231906: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.004537788629531861, 'time_algorithm_update': 0.010662560939788818, 'critic_loss': 119144738.976, 'actor_loss': -61958.94170703125, 'time_step': 0.015440114498138428, 'td_error': 213831501.89622408, 'value_scale': 72724.79450931214, 'discounted_advantage': -119036.07503401978, 'initial_state': 74516.7265625, 'diff_eval': 113474.30846605197} step=22000
2025-12-05 23:26.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.88it/s, critic_loss=2.82e+8, actor_loss=-9.17e+4]


2025-12-05 23:26.39 [info     ] DDPG_20251205231906: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.004872642517089844, 'time_algorithm_update': 0.011006672382354737, 'critic_loss': 282753991.616, 'actor_loss': -91898.123484375, 'time_step': 0.01612303614616394, 'td_error': 454420770.5201108, 'value_scale': 106408.45387154233, 'discounted_advantage': -162511.91020582922, 'initial_state': 110624.8828125, 'diff_eval': 113474.30846605197} step=23000
2025-12-05 23:26.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.69it/s, critic_loss=6.16e+8, actor_loss=-1.33e+5]


2025-12-05 23:26.58 [info     ] DDPG_20251205231906: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.0046779074668884275, 'time_algorithm_update': 0.010694488763809204, 'critic_loss': 618990512.48, 'actor_loss': -133069.4071015625, 'time_step': 0.01563135743141174, 'td_error': 944406790.0175085, 'value_scale': 151749.34905765404, 'discounted_advantage': -230932.10804589497, 'initial_state': 159853.8125, 'diff_eval': 113474.30846605197} step=24000
2025-12-05 23:26.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.22it/s, critic_loss=1.29e+9, actor_loss=-1.88e+5]


2025-12-05 23:27.17 [info     ] DDPG_20251205231906: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.0047745051383972165, 'time_algorithm_update': 0.0109702410697937, 'critic_loss': 1297915553.088, 'actor_loss': -188099.372, 'time_step': 0.016013593912124632, 'td_error': 1850709466.0551188, 'value_scale': 211084.16590921, 'discounted_advantage': -320879.9044924545, 'initial_state': 225483.984375, 'diff_eval': 113474.30846605197} step=25000
2025-12-05 23:27.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.02it/s, critic_loss=2.54e+9, actor_loss=-2.59e+5]


2025-12-05 23:27.35 [info     ] DDPG_20251205231906: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.004549583435058594, 'time_algorithm_update': 0.010551217079162598, 'critic_loss': 2547602590.72, 'actor_loss': -259785.991015625, 'time_step': 0.015328145503997803, 'td_error': 3451989338.0906887, 'value_scale': 285989.05180610856, 'discounted_advantage': -440737.79551508697, 'initial_state': 309811.875, 'diff_eval': 113474.30846605197} step=26000
2025-12-05 23:27.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.52it/s, critic_loss=4.87e+9, actor_loss=-3.51e+5]


2025-12-05 23:27.54 [info     ] DDPG_20251205231906: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.0047694950103759765, 'time_algorithm_update': 0.010907629013061523, 'critic_loss': 4881783233.536, 'actor_loss': -351959.94684375, 'time_step': 0.015934752464294433, 'td_error': 6210705309.459674, 'value_scale': 383077.62505238893, 'discounted_advantage': -580426.4958737734, 'initial_state': 420824.0, 'diff_eval': 113474.30846605197} step=27000
2025-12-05 23:27.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.31it/s, critic_loss=8.77e+9, actor_loss=-4.68e+5]


2025-12-05 23:28.14 [info     ] DDPG_20251205231906: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.005115369319915772, 'time_algorithm_update': 0.01145243239402771, 'critic_loss': 8792456453.12, 'actor_loss': -469079.98271875, 'time_step': 0.016816556215286254, 'td_error': 10881295978.5866, 'value_scale': 503710.413977368, 'discounted_advantage': -770525.0944445389, 'initial_state': 559591.0625, 'diff_eval': 113474.30846605197} step=28000
2025-12-05 23:28.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.64it/s, critic_loss=1.54e+10, actor_loss=-6.14e+5]


2025-12-05 23:28.33 [info     ] DDPG_20251205231906: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.004557753801345825, 'time_algorithm_update': 0.010638379573822021, 'critic_loss': 15422058205.184, 'actor_loss': -615203.5738125, 'time_step': 0.015432782888412476, 'td_error': 18259537726.39289, 'value_scale': 652945.5734361903, 'discounted_advantage': -977718.726320666, 'initial_state': 733251.6875, 'diff_eval': 113474.30846605197} step=29000
2025-12-05 23:28.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.53it/s, critic_loss=2.64e+10, actor_loss=-7.93e+5]


2025-12-05 23:28.51 [info     ] DDPG_20251205231906: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.004557680606842041, 'time_algorithm_update': 0.01066939902305603, 'critic_loss': 26457104023.552, 'actor_loss': -793605.4031875, 'time_step': 0.015456312656402588, 'td_error': 30085321873.47831, 'value_scale': 834361.2878379086, 'discounted_advantage': -1261654.088785704, 'initial_state': 947715.5, 'diff_eval': 113474.30846605197} step=30000
2025-12-05 23:28.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.04it/s, critic_loss=4.3e+10, actor_loss=-1.01e+6]


2025-12-05 23:29.10 [info     ] DDPG_20251205231906: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.004605009555816651, 'time_algorithm_update': 0.010739500761032104, 'critic_loss': 43180181161.984, 'actor_loss': -1011660.6620625, 'time_step': 0.015582934856414795, 'td_error': 47934518122.04915, 'value_scale': 1058806.0501886, 'discounted_advantage': -1541900.192221064, 'initial_state': 1214315.25, 'diff_eval': 113474.30846605197} step=31000
2025-12-05 23:29.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.42it/s, critic_loss=6.94e+10, actor_loss=-1.27e+6]


2025-12-05 23:29.29 [info     ] DDPG_20251205231906: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.004614808559417725, 'time_algorithm_update': 0.01061663818359375, 'critic_loss': 69480482172.928, 'actor_loss': -1272224.298625, 'time_step': 0.015469686269760132, 'td_error': 74695673315.72992, 'value_scale': 1319337.988291073, 'discounted_advantage': -1906189.6675718825, 'initial_state': 1525076.5, 'diff_eval': 113474.30846605197} step=32000
2025-12-05 23:29.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.88it/s, critic_loss=1.09e+11, actor_loss=-1.58e+6]


2025-12-05 23:29.48 [info     ] DDPG_20251205231906: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.004735206127166748, 'time_algorithm_update': 0.010893637657165528, 'critic_loss': 109017245704.192, 'actor_loss': -1578908.194625, 'time_step': 0.015868787050247194, 'td_error': 114179325738.91074, 'value_scale': 1628486.2300398157, 'discounted_advantage': -2337802.7873576726, 'initial_state': 1896996.125, 'diff_eval': 113474.30846605197} step=33000
2025-12-05 23:29.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.31it/s, critic_loss=1.65e+11, actor_loss=-1.93e+6]


2025-12-05 23:30.06 [info     ] DDPG_20251205231906: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.0046449432373046875, 'time_algorithm_update': 0.01064213252067566, 'critic_loss': 164880719241.216, 'actor_loss': -1935956.990375, 'time_step': 0.01551083517074585, 'td_error': 169621997581.84903, 'value_scale': 1987429.1727525147, 'discounted_advantage': -2794649.5387994624, 'initial_state': 2333058.25, 'diff_eval': 113474.30846605197} step=34000
2025-12-05 23:30.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.51it/s, critic_loss=2.43e+11, actor_loss=-2.35e+6]


2025-12-05 23:30.25 [info     ] DDPG_20251205231906: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.004538865089416504, 'time_algorithm_update': 0.010700791597366333, 'critic_loss': 243808814350.336, 'actor_loss': -2347374.699, 'time_step': 0.015458155870437622, 'td_error': 246958712355.07703, 'value_scale': 2391153.915601425, 'discounted_advantage': -3387881.6495340867, 'initial_state': 2822758.75, 'diff_eval': 113474.30846605197} step=35000
2025-12-05 23:30.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.11it/s, critic_loss=3.59e+11, actor_loss=-2.81e+6]


2025-12-05 23:30.44 [info     ] DDPG_20251205231906: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.0047048327922821044, 'time_algorithm_update': 0.010865171670913696, 'critic_loss': 359348399439.872, 'actor_loss': -2816452.807, 'time_step': 0.015802068710327147, 'td_error': 354917213811.3051, 'value_scale': 2862804.359440486, 'discounted_advantage': -4040415.3580949437, 'initial_state': 3400470.5, 'diff_eval': 113474.30846605197} step=36000
2025-12-05 23:30.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.49it/s, critic_loss=5.11e+11, actor_loss=-3.35e+6]


2025-12-05 23:31.02 [info     ] DDPG_20251205231906: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.004651332378387451, 'time_algorithm_update': 0.010815746068954468, 'critic_loss': 512303968321.536, 'actor_loss': -3355072.769, 'time_step': 0.01571410655975342, 'td_error': 499657645353.3664, 'value_scale': 3389199.0783738475, 'discounted_advantage': -4812096.468694861, 'initial_state': 4048102.75, 'diff_eval': 113474.30846605197} step=37000
2025-12-05 23:31.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.02it/s, critic_loss=7.14e+11, actor_loss=-3.96e+6]


2025-12-05 23:31.21 [info     ] DDPG_20251205231906: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.004621804475784302, 'time_algorithm_update': 0.010731343030929565, 'critic_loss': 714615766646.784, 'actor_loss': -3961182.974, 'time_step': 0.015581546545028687, 'td_error': 692102425951.8223, 'value_scale': 3995284.116565381, 'discounted_advantage': -5580562.6143335, 'initial_state': 4793911.5, 'diff_eval': 113474.30846605197} step=38000
2025-12-05 23:31.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.33it/s, critic_loss=9.92e+11, actor_loss=-4.64e+6]


2025-12-05 23:31.40 [info     ] DDPG_20251205231906: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.004571876525878906, 'time_algorithm_update': 0.010699018716812133, 'critic_loss': 992759220142.08, 'actor_loss': -4646681.764, 'time_step': 0.01551135230064392, 'td_error': 949880643430.4717, 'value_scale': 4666885.030123638, 'discounted_advantage': -6581964.2198538305, 'initial_state': 5622869.0, 'diff_eval': 113474.30846605197} step=39000
2025-12-05 23:31.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.23it/s, critic_loss=1.36e+12, actor_loss=-5.4e+6]


2025-12-05 23:31.58 [info     ] DDPG_20251205231906: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.004639979124069214, 'time_algorithm_update': 0.01064664626121521, 'critic_loss': 1359575147675.648, 'actor_loss': -5408363.4025, 'time_step': 0.015527047634124757, 'td_error': 1281113356023.9785, 'value_scale': 5420378.961022632, 'discounted_advantage': -7591857.806880574, 'initial_state': 6557899.5, 'diff_eval': 113474.30846605197} step=40000
2025-12-05 23:31.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.82it/s, critic_loss=1.83e+12, actor_loss=-6.25e+6]


2025-12-05 23:32.17 [info     ] DDPG_20251205231906: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.004778107166290283, 'time_algorithm_update': 0.010844562530517578, 'critic_loss': 1830699737743.36, 'actor_loss': -6257042.87, 'time_step': 0.01587439179420471, 'td_error': 1713249732491.9773, 'value_scale': 6259638.637259011, 'discounted_advantage': -8789623.39940406, 'initial_state': 7604585.5, 'diff_eval': 113474.30846605197} step=41000
2025-12-05 23:32.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.38it/s, critic_loss=2.43e+12, actor_loss=-7.19e+6]


2025-12-05 23:32.36 [info     ] DDPG_20251205231906: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.0048297691345214845, 'time_algorithm_update': 0.01093151021003723, 'critic_loss': 2428515565830.144, 'actor_loss': -7194064.655, 'time_step': 0.015994505882263185, 'td_error': 2248227613067.132, 'value_scale': 7177749.76969824, 'discounted_advantage': -9932596.041203246, 'initial_state': 8753331.0, 'diff_eval': 113474.30846605197} step=42000
2025-12-05 23:32.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.85it/s, critic_loss=3.17e+12, actor_loss=-8.21e+6]


2025-12-05 23:32.56 [info     ] DDPG_20251205231906: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.004842535972595215, 'time_algorithm_update': 0.011037933111190796, 'critic_loss': 3173443513221.12, 'actor_loss': -8219523.5555, 'time_step': 0.016134773969650268, 'td_error': 2922234268918.367, 'value_scale': 8183566.8751047775, 'discounted_advantage': -11221916.838925086, 'initial_state': 10008963.0, 'diff_eval': 113474.30846605197} step=43000
2025-12-05 23:32.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.98it/s, critic_loss=4.14e+12, actor_loss=-9.33e+6]


2025-12-05 23:33.14 [info     ] DDPG_20251205231906: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.004590370655059814, 'time_algorithm_update': 0.010760159254074097, 'critic_loss': 4144720739041.28, 'actor_loss': -9332755.754, 'time_step': 0.015596230268478393, 'td_error': 3773867230661.8633, 'value_scale': 9286178.210184408, 'discounted_advantage': -12774336.041059073, 'initial_state': 11388047.0, 'diff_eval': 113474.30846605197} step=44000
2025-12-05 23:33.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.58it/s, critic_loss=5.26e+12, actor_loss=-1.06e+7]


2025-12-05 23:33.33 [info     ] DDPG_20251205231906: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.004739699125289917, 'time_algorithm_update': 0.010939087390899658, 'critic_loss': 5264055779983.36, 'actor_loss': -10559893.888, 'time_step': 0.015919933795928955, 'td_error': 4810353436229.782, 'value_scale': 10497106.080364628, 'discounted_advantage': -14295556.48628548, 'initial_state': 12913106.0, 'diff_eval': 113474.30846605197} step=45000
2025-12-05 23:33.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.09it/s, critic_loss=6.75e+12, actor_loss=-1.19e+7]


2025-12-05 23:33.53 [info     ] DDPG_20251205231906: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.004769749879837036, 'time_algorithm_update': 0.011241839170455932, 'critic_loss': 6756404764868.608, 'actor_loss': -11899135.838, 'time_step': 0.016343748092651367, 'td_error': 6109411522668.161, 'value_scale': 11802345.74559933, 'discounted_advantage': -16267915.38007461, 'initial_state': 14558292.0, 'diff_eval': 113474.30846605197} step=46000
2025-12-05 23:33.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.35it/s, critic_loss=8.51e+12, actor_loss=-1.33e+7]


2025-12-05 23:34.12 [info     ] DDPG_20251205231906: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.004753166913986206, 'time_algorithm_update': 0.011007914543151856, 'critic_loss': 8535949234929.664, 'actor_loss': -13352426.875, 'time_step': 0.015998865842819215, 'td_error': 7663052456614.025, 'value_scale': 13225598.1003772, 'discounted_advantage': -18082912.864661384, 'initial_state': 16357921.0, 'diff_eval': 113474.30846605197} step=47000
2025-12-05 23:34.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.89it/s, critic_loss=1.07e+13, actor_loss=-1.49e+7]


2025-12-05 23:34.31 [info     ] DDPG_20251205231906: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.0047795729637146, 'time_algorithm_update': 0.010847588062286377, 'critic_loss': 10666925077561.344, 'actor_loss': -14915985.485, 'time_step': 0.015858334302902222, 'td_error': 9530480554254.691, 'value_scale': 14758902.82334451, 'discounted_advantage': -19970331.36704289, 'initial_state': 18298938.0, 'diff_eval': 113474.30846605197} step=48000
2025-12-05 23:34.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.46it/s, critic_loss=1.34e+13, actor_loss=-1.66e+7]


2025-12-05 23:34.51 [info     ] DDPG_20251205231906: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.005022865056991577, 'time_algorithm_update': 0.011204634189605713, 'critic_loss': 13392019071696.896, 'actor_loss': -16597250.994, 'time_step': 0.016494098663330077, 'td_error': 11825390981875.379, 'value_scale': 16400257.102682313, 'discounted_advantage': -22483237.241513327, 'initial_state': 20381074.0, 'diff_eval': 113474.30846605197} step=49000
2025-12-05 23:34.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.05it/s, critic_loss=1.63e+13, actor_loss=-1.84e+7]


2025-12-05 23:35.09 [info     ] DDPG_20251205231906: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.0046307845115661625, 'time_algorithm_update': 0.010706836462020874, 'critic_loss': 16307078450642.943, 'actor_loss': -18438385.024, 'time_step': 0.015572856664657592, 'td_error': 14572462182949.834, 'value_scale': 18214371.193839062, 'discounted_advantage': -24729255.84681024, 'initial_state': 22679618.0, 'diff_eval': 113474.30846605197} step=50000
2025-12-05 23:35.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.36it/s, critic_loss=2.01e+13, actor_loss=-2.04e+7]


2025-12-05 23:35.28 [info     ] DDPG_20251205231906: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.0046873059272766115, 'time_algorithm_update': 0.010803206443786621, 'critic_loss': 20126224927948.8, 'actor_loss': -20422994.406, 'time_step': 0.015726655006408692, 'td_error': 17828259016308.953, 'value_scale': 20140964.941743504, 'discounted_advantage': -27351647.399511248, 'initial_state': 25127810.0, 'diff_eval': 113474.30846605197} step=51000
2025-12-05 23:35.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.34it/s, critic_loss=2.46e+13, actor_loss=-2.25e+7]


2025-12-05 23:35.47 [info     ] DDPG_20251205231906: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.004592335462570191, 'time_algorithm_update': 0.010678571462631226, 'critic_loss': 24653997203783.68, 'actor_loss': -22543365.42, 'time_step': 0.015503774166107178, 'td_error': 21698696564383.008, 'value_scale': 22212209.393126573, 'discounted_advantage': -30119063.324109163, 'initial_state': 27754810.0, 'diff_eval': 113474.30846605197} step=52000
2025-12-05 23:35.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.33it/s, critic_loss=2.97e+13, actor_loss=-2.48e+7]


2025-12-05 23:36.06 [info     ] DDPG_20251205231906: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.004692867279052735, 'time_algorithm_update': 0.010816091537475587, 'critic_loss': 29766076092907.52, 'actor_loss': -24833536.804, 'time_step': 0.015742497205734254, 'td_error': 26231305714843.727, 'value_scale': 24438813.48512154, 'discounted_advantage': -32773035.317805786, 'initial_state': 30598510.0, 'diff_eval': 113474.30846605197} step=53000
2025-12-05 23:36.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.09it/s, critic_loss=3.63e+13, actor_loss=-2.72e+7]


2025-12-05 23:36.24 [info     ] DDPG_20251205231906: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.004610424757003784, 'time_algorithm_update': 0.010735370874404907, 'critic_loss': 36369753691914.24, 'actor_loss': -27236279.886, 'time_step': 0.015574740171432495, 'td_error': 31562500786296.53, 'value_scale': 26773813.041072924, 'discounted_advantage': -36174583.711876616, 'initial_state': 33578704.0, 'diff_eval': 113474.30846605197} step=54000
2025-12-05 23:36.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.62it/s, critic_loss=4.35e+13, actor_loss=-2.98e+7]


2025-12-05 23:36.43 [info     ] DDPG_20251205231906: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.004634008884429931, 'time_algorithm_update': 0.010792454957962036, 'critic_loss': 43509530755072.0, 'actor_loss': -29825226.086, 'time_step': 0.01566972756385803, 'td_error': 37786629789783.36, 'value_scale': 29278387.24056999, 'discounted_advantage': -39608781.59700287, 'initial_state': 36773652.0, 'diff_eval': 113474.30846605197} step=55000
2025-12-05 23:36.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.51it/s, critic_loss=5.22e+13, actor_loss=-3.26e+7]


2025-12-05 23:37.02 [info     ] DDPG_20251205231906: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.004664803266525268, 'time_algorithm_update': 0.010799786567687989, 'critic_loss': 52265342086414.336, 'actor_loss': -32571344.414, 'time_step': 0.015709301948547364, 'td_error': 45098609100924.586, 'value_scale': 31977243.16303437, 'discounted_advantage': -43147841.39449372, 'initial_state': 40227472.0, 'diff_eval': 113474.30846605197} step=56000
2025-12-05 23:37.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.66it/s, critic_loss=6.24e+13, actor_loss=-3.55e+7]


2025-12-05 23:37.20 [info     ] DDPG_20251205231906: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.0045050415992736815, 'time_algorithm_update': 0.010677791357040405, 'critic_loss': 62460493593313.28, 'actor_loss': -35507118.928, 'time_step': 0.015418055057525635, 'td_error': 53541726071340.46, 'value_scale': 34815827.994970664, 'discounted_advantage': -47137638.39898462, 'initial_state': 43880480.0, 'diff_eval': 113474.30846605197} step=57000
2025-12-05 23:37.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.67it/s, critic_loss=7.37e+13, actor_loss=-3.86e+7]


2025-12-05 23:37.39 [info     ] DDPG_20251205231906: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.004645737409591674, 'time_algorithm_update': 0.010771610260009766, 'critic_loss': 73791846586777.6, 'actor_loss': -38610867.444, 'time_step': 0.015661863565444946, 'td_error': 63171314727725.17, 'value_scale': 37803785.59765298, 'discounted_advantage': -51165019.74215588, 'initial_state': 47713684.0, 'diff_eval': 113474.30846605197} step=58000
2025-12-05 23:37.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.82it/s, critic_loss=8.63e+13, actor_loss=-4.19e+7]


2025-12-05 23:37.58 [info     ] DDPG_20251205231906: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.004654911756515503, 'time_algorithm_update': 0.010713610887527467, 'critic_loss': 86241959944912.89, 'actor_loss': -41900363.672, 'time_step': 0.015621422529220582, 'td_error': 74207136163195.4, 'value_scale': 40998986.24643755, 'discounted_advantage': -54961386.99642627, 'initial_state': 51811536.0, 'diff_eval': 113474.30846605197} step=59000
2025-12-05 23:37.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.06it/s, critic_loss=1.02e+14, actor_loss=-4.54e+7]


2025-12-05 23:38.18 [info     ] DDPG_20251205231906: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.004960621356964111, 'time_algorithm_update': 0.011401867151260375, 'critic_loss': 101744853485879.3, 'actor_loss': -45371088.432, 'time_step': 0.016604539155960083, 'td_error': 86794856441195.86, 'value_scale': 44358161.41953059, 'discounted_advantage': -59156030.078116894, 'initial_state': 56147560.0, 'diff_eval': 113474.30846605197} step=60000
2025-12-05 23:38.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.99it/s, critic_loss=1.19e+14, actor_loss=-4.9e+7]


2025-12-05 23:38.36 [info     ] DDPG_20251205231906: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.00453121280670166, 'time_algorithm_update': 0.010591018438339234, 'critic_loss': 118996616257470.47, 'actor_loss': -49020658.384, 'time_step': 0.015353092670440674, 'td_error': 101501042138290.31, 'value_scale': 47896177.57082984, 'discounted_advantage': -64509849.88003687, 'initial_state': 60690308.0, 'diff_eval': 113474.30846605197} step=61000
2025-12-05 23:38.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.48it/s, critic_loss=1.39e+14, actor_loss=-5.29e+7]


2025-12-05 23:38.54 [info     ] DDPG_20251205231906: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.004521836042404175, 'time_algorithm_update': 0.01046346616744995, 'critic_loss': 139309323456086.02, 'actor_loss': -52918786.88, 'time_step': 0.015221255779266358, 'td_error': 118394883967263.55, 'value_scale': 51760628.33361274, 'discounted_advantage': -69015843.74891454, 'initial_state': 65673696.0, 'diff_eval': 113474.30846605197} step=62000
2025-12-05 23:38.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.66it/s, critic_loss=1.6e+14, actor_loss=-5.7e+7] 


2025-12-05 23:39.13 [info     ] DDPG_20251205231906: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.004763296365737915, 'time_algorithm_update': 0.010891176223754883, 'critic_loss': 160477865257730.06, 'actor_loss': -57067248.776, 'time_step': 0.015907013416290285, 'td_error': 137097047454184.83, 'value_scale': 55667614.95892707, 'discounted_advantage': -74442991.4833816, 'initial_state': 70710224.0, 'diff_eval': 113474.30846605197} step=63000
2025-12-05 23:39.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.38it/s, critic_loss=1.87e+14, actor_loss=-6.14e+7]


2025-12-05 23:39.34 [info     ] DDPG_20251205231906: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.0051980667114257815, 'time_algorithm_update': 0.011578614234924317, 'critic_loss': 187030022366691.3, 'actor_loss': -61379198.168, 'time_step': 0.01706612467765808, 'td_error': 158641438789584.3, 'value_scale': 59875793.33403185, 'discounted_advantage': -79842612.32080713, 'initial_state': 76139952.0, 'diff_eval': 113474.30846605197} step=64000
2025-12-05 23:39.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.51it/s, critic_loss=2.15e+14, actor_loss=-6.59e+7]


2025-12-05 23:39.53 [info     ] DDPG_20251205231906: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.004663383007049561, 'time_algorithm_update': 0.010803138732910156, 'critic_loss': 215356992946962.44, 'actor_loss': -65922279.708, 'time_step': 0.01570044541358948, 'td_error': 182779067822866.72, 'value_scale': 64298721.70662196, 'discounted_advantage': -85023665.8683074, 'initial_state': 81879960.0, 'diff_eval': 113474.30846605197} step=65000
2025-12-05 23:39.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.82it/s, critic_loss=2.5e+14, actor_loss=-7.07e+7]


2025-12-05 23:40.11 [info     ] DDPG_20251205231906: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.0046765775680541995, 'time_algorithm_update': 0.010711174726486207, 'critic_loss': 250524107588763.66, 'actor_loss': -70686546.552, 'time_step': 0.015628434419631958, 'td_error': 210101053974277.3, 'value_scale': 68802421.38139145, 'discounted_advantage': -92298075.87954773, 'initial_state': 87701296.0, 'diff_eval': 113474.30846605197} step=66000
2025-12-05 23:40.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.93it/s, critic_loss=2.84e+14, actor_loss=-7.57e+7]


2025-12-05 23:40.30 [info     ] DDPG_20251205231906: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.004629862308502197, 'time_algorithm_update': 0.010730334281921387, 'critic_loss': 283556243103023.1, 'actor_loss': -75738782.84, 'time_step': 0.015601610660552979, 'td_error': 240712959128271.97, 'value_scale': 73667714.78373848, 'discounted_advantage': -98254775.79662374, 'initial_state': 94003320.0, 'diff_eval': 113474.30846605197} step=67000
2025-12-05 23:40.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.43it/s, critic_loss=3.28e+14, actor_loss=-8.09e+7]


2025-12-05 23:40.51 [info     ] DDPG_20251205231906: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.005160620450973511, 'time_algorithm_update': 0.011654142141342163, 'critic_loss': 328357177628557.3, 'actor_loss': -80923260.088, 'time_step': 0.017078009128570555, 'td_error': 275114182269474.72, 'value_scale': 78734124.34534787, 'discounted_advantage': -104858563.96547262, 'initial_state': 100556168.0, 'diff_eval': 113474.30846605197} step=68000
2025-12-05 23:40.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.74it/s, critic_loss=3.77e+14, actor_loss=-8.64e+7]


2025-12-05 23:41.09 [info     ] DDPG_20251205231906: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.004673149824142456, 'time_algorithm_update': 0.010731951236724853, 'critic_loss': 377563275946098.7, 'actor_loss': -86409791.16, 'time_step': 0.015642892837524413, 'td_error': 313964226816609.0, 'value_scale': 84031636.21123219, 'discounted_advantage': -112317187.49120902, 'initial_state': 107404608.0, 'diff_eval': 113474.30846605197} step=69000
2025-12-05 23:41.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.77it/s, critic_loss=4.26e+14, actor_loss=-9.21e+7]


2025-12-05 23:41.28 [info     ] DDPG_20251205231906: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.0046945023536682125, 'time_algorithm_update': 0.010971978902816773, 'critic_loss': 425671323847491.56, 'actor_loss': -92160235.536, 'time_step': 0.015906502962112427, 'td_error': 356306122678769.4, 'value_scale': 89580744.8281643, 'discounted_advantage': -118568922.3897403, 'initial_state': 114579192.0, 'diff_eval': 113474.30846605197} step=70000
2025-12-05 23:41.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.19it/s, critic_loss=4.83e+14, actor_loss=-9.81e+7]


2025-12-05 23:41.49 [info     ] DDPG_20251205231906: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.00533656644821167, 'time_algorithm_update': 0.011814863920211792, 'critic_loss': 483814291498074.1, 'actor_loss': -98124330.08, 'time_step': 0.01742367362976074, 'td_error': 403435781595810.0, 'value_scale': 95314582.53227158, 'discounted_advantage': -125769326.63730372, 'initial_state': 122050440.0, 'diff_eval': 113474.30846605197} step=71000
2025-12-05 23:41.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.66it/s, critic_loss=5.47e+14, actor_loss=-1.04e+8]


2025-12-05 23:42.09 [info     ] DDPG_20251205231906: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.004824564218521118, 'time_algorithm_update': 0.011379347562789917, 'critic_loss': 546895446971252.75, 'actor_loss': -104331594.64, 'time_step': 0.016461648225784303, 'td_error': 455791160874990.0, 'value_scale': 101194290.78625314, 'discounted_advantage': -134753311.5461924, 'initial_state': 129713288.0, 'diff_eval': 113474.30846605197} step=72000
2025-12-05 23:42.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.80it/s, critic_loss=6.18e+14, actor_loss=-1.11e+8]


2025-12-05 23:42.30 [info     ] DDPG_20251205231906: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.004938921213150024, 'time_algorithm_update': 0.012101240396499634, 'critic_loss': 617753721857638.4, 'actor_loss': -110793802.536, 'time_step': 0.01728847336769104, 'td_error': 515074386400518.1, 'value_scale': 107517720.17602682, 'discounted_advantage': -143313259.41850978, 'initial_state': 137930960.0, 'diff_eval': 113474.30846605197} step=73000
2025-12-05 23:42.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:19<00:00, 52.26it/s, critic_loss=7.01e+14, actor_loss=-1.18e+8]


2025-12-05 23:42.52 [info     ] DDPG_20251205231906: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.0048402795791625975, 'time_algorithm_update': 0.013738410472869873, 'critic_loss': 700705641169158.1, 'actor_loss': -117750991.912, 'time_step': 0.018821486234664918, 'td_error': 581857404925225.2, 'value_scale': 114277704.18440905, 'discounted_advantage': -151748002.20710984, 'initial_state': 146731904.0, 'diff_eval': 113474.30846605197} step=74000
2025-12-05 23:42.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.13it/s, critic_loss=7.88e+14, actor_loss=-1.25e+8]


2025-12-05 23:43.12 [info     ] DDPG_20251205231906: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.004660715103149414, 'time_algorithm_update': 0.012306865930557251, 'critic_loss': 788006335026298.9, 'actor_loss': -124974294.016, 'time_step': 0.017205134153366088, 'td_error': 654250738583028.0, 'value_scale': 121230728.49874267, 'discounted_advantage': -159917896.45816308, 'initial_state': 155809328.0, 'diff_eval': 113474.30846605197} step=75000
2025-12-05 23:43.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.98it/s, critic_loss=8.89e+14, actor_loss=-1.32e+8]


2025-12-05 23:43.31 [info     ] DDPG_20251205231906: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.004952091693878174, 'time_algorithm_update': 0.01093021321296692, 'critic_loss': 889984151367712.8, 'actor_loss': -132448487.72, 'time_step': 0.01611876678466797, 'td_error': 733832814448895.1, 'value_scale': 128246702.15926236, 'discounted_advantage': -170335342.2140901, 'initial_state': 164913024.0, 'diff_eval': 113474.30846605197} step=76000
2025-12-05 23:43.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.50it/s, critic_loss=9.85e+14, actor_loss=-1.4e+8]


2025-12-05 23:43.52 [info     ] DDPG_20251205231906: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.004925089836120606, 'time_algorithm_update': 0.012172935962677002, 'critic_loss': 985263808370966.5, 'actor_loss': -140150569.664, 'time_step': 0.017358862400054933, 'td_error': 821733512165752.9, 'value_scale': 135764792.53059512, 'discounted_advantage': -179094175.50276393, 'initial_state': 174759280.0, 'diff_eval': 113474.30846605197} step=77000
2025-12-05 23:43.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.80it/s, critic_loss=1.12e+15, actor_loss=-1.48e+8]


2025-12-05 23:44.12 [info     ] DDPG_20251205231906: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.0050444421768188474, 'time_algorithm_update': 0.01198848819732666, 'critic_loss': 1122086600734408.8, 'actor_loss': -148166135.712, 'time_step': 0.01728678870201111, 'td_error': 917480287863903.9, 'value_scale': 143270792.50293377, 'discounted_advantage': -190916802.08477852, 'initial_state': 184507312.0, 'diff_eval': 113474.30846605197} step=78000
2025-12-05 23:44.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.76it/s, critic_loss=1.25e+15, actor_loss=-1.56e+8]


2025-12-05 23:44.31 [info     ] DDPG_20251205231906: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.0046394155025482175, 'time_algorithm_update': 0.010746683359146119, 'critic_loss': 1249995126063759.2, 'actor_loss': -156400148.176, 'time_step': 0.01562918519973755, 'td_error': 1025670019758652.5, 'value_scale': 151493314.17770326, 'discounted_advantage': -201065089.22306773, 'initial_state': 195204544.0, 'diff_eval': 113474.30846605197} step=79000
2025-12-05 23:44.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:18<00:00, 53.21it/s, critic_loss=1.37e+15, actor_loss=-1.65e+8]


2025-12-05 23:44.53 [info     ] DDPG_20251205231906: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.004681661605834961, 'time_algorithm_update': 0.013567135095596313, 'critic_loss': 1374665960828436.5, 'actor_loss': -165411905.024, 'time_step': 0.018490460395812987, 'td_error': 1144073916485072.0, 'value_scale': 160080695.98491198, 'discounted_advantage': -210522320.21172422, 'initial_state': 206454080.0, 'diff_eval': 113474.30846605197} step=80000
2025-12-05 23:44.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.32it/s, critic_loss=1.55e+15, actor_loss=-1.74e+8]


2025-12-05 23:45.13 [info     ] DDPG_20251205231906: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.004676357507705688, 'time_algorithm_update': 0.011527259826660157, 'critic_loss': 1549426168178933.8, 'actor_loss': -174476576.816, 'time_step': 0.01653114938735962, 'td_error': 1272686924929935.2, 'value_scale': 168688600.71248952, 'discounted_advantage': -223279464.4288745, 'initial_state': 217689088.0, 'diff_eval': 113474.30846605197} step=81000
2025-12-05 23:45.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.61it/s, critic_loss=1.72e+15, actor_loss=-1.84e+8]


2025-12-05 23:45.32 [info     ] DDPG_20251205231906: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.0045490386486053466, 'time_algorithm_update': 0.01058396816253662, 'critic_loss': 1724082486780100.5, 'actor_loss': -183832732.736, 'time_step': 0.015394403219223023, 'td_error': 1412553391366618.5, 'value_scale': 177701161.5004191, 'discounted_advantage': -234613194.286346, 'initial_state': 229476048.0, 'diff_eval': 113474.30846605197} step=82000
2025-12-05 23:45.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.90it/s, critic_loss=1.92e+15, actor_loss=-1.93e+8]


2025-12-05 23:45.51 [info     ] DDPG_20251205231906: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.004880887508392334, 'time_algorithm_update': 0.010977516889572143, 'critic_loss': 1923650081299366.0, 'actor_loss': -193315332.128, 'time_step': 0.016101855039596557, 'td_error': 1564338651305741.5, 'value_scale': 186929403.31936294, 'discounted_advantage': -246955773.39669418, 'initial_state': 241500816.0, 'diff_eval': 113474.30846605197} step=83000
2025-12-05 23:45.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.27it/s, critic_loss=2.09e+15, actor_loss=-2.03e+8]


2025-12-05 23:46.11 [info     ] DDPG_20251205231906: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.004814257860183716, 'time_algorithm_update': 0.011518083333969116, 'critic_loss': 2090528199497744.5, 'actor_loss': -203485215.92, 'time_step': 0.016570557355880738, 'td_error': 1729174859208306.8, 'value_scale': 196620793.61106455, 'discounted_advantage': -257675754.28356832, 'initial_state': 254194528.0, 'diff_eval': 113474.30846605197} step=84000
2025-12-05 23:46.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.59it/s, critic_loss=2.32e+15, actor_loss=-2.14e+8]


2025-12-05 23:46.30 [info     ] DDPG_20251205231906: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.004705993175506592, 'time_algorithm_update': 0.010720739364624024, 'critic_loss': 2318785096041103.5, 'actor_loss': -213661759.504, 'time_step': 0.01567393732070923, 'td_error': 1905335202678588.2, 'value_scale': 206248696.7627829, 'discounted_advantage': -271771431.4462831, 'initial_state': 266802016.0, 'diff_eval': 113474.30846605197} step=85000
2025-12-05 23:46.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.12it/s, critic_loss=2.56e+15, actor_loss=-2.24e+8]


2025-12-05 23:46.48 [info     ] DDPG_20251205231906: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.004663333654403686, 'time_algorithm_update': 0.010636986017227172, 'critic_loss': 2560468228459462.5, 'actor_loss': -224254373.216, 'time_step': 0.015545469522476197, 'td_error': 2099289440227879.0, 'value_scale': 216501422.54484493, 'discounted_advantage': -284317898.98751754, 'initial_state': 280247200.0, 'diff_eval': 113474.30846605197} step=86000
2025-12-05 23:46.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.62it/s, critic_loss=2.82e+15, actor_loss=-2.35e+8]


2025-12-05 23:47.07 [info     ] DDPG_20251205231906: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.004662316799163818, 'time_algorithm_update': 0.010743755340576172, 'critic_loss': 2817695383050780.5, 'actor_loss': -235381676.208, 'time_step': 0.01566087341308594, 'td_error': 2311007389476413.0, 'value_scale': 227100961.12321877, 'discounted_advantage': -298006656.13022375, 'initial_state': 294124096.0, 'diff_eval': 113474.30846605197} step=87000
2025-12-05 23:47.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.61it/s, critic_loss=3.13e+15, actor_loss=-2.47e+8]


2025-12-05 23:47.26 [info     ] DDPG_20251205231906: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.004732972383499146, 'time_algorithm_update': 0.010961774587631225, 'critic_loss': 3128720614957777.0, 'actor_loss': -246613898.128, 'time_step': 0.01593332552909851, 'td_error': 2541897489925420.5, 'value_scale': 238034886.08549875, 'discounted_advantage': -313201244.95341736, 'initial_state': 308404192.0, 'diff_eval': 113474.30846605197} step=88000
2025-12-05 23:47.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.84it/s, critic_loss=3.41e+15, actor_loss=-2.58e+8]


2025-12-05 23:47.45 [info     ] DDPG_20251205231906: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.004648435831069947, 'time_algorithm_update': 0.010751209497451782, 'critic_loss': 3409684874020258.0, 'actor_loss': -258542217.696, 'time_step': 0.015635724067687987, 'td_error': 2790592018558867.0, 'value_scale': 249426623.26404023, 'discounted_advantage': -327258594.20170754, 'initial_state': 323381248.0, 'diff_eval': 113474.30846605197} step=89000
2025-12-05 23:47.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.89it/s, critic_loss=3.76e+15, actor_loss=-2.71e+8]


2025-12-05 23:48.04 [info     ] DDPG_20251205231906: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.004873304843902588, 'time_algorithm_update': 0.011015775680541993, 'critic_loss': 3767842638410023.0, 'actor_loss': -270689591.024, 'time_step': 0.016135757207870482, 'td_error': 3055656538573013.5, 'value_scale': 260906177.49371332, 'discounted_advantage': -342687602.6192513, 'initial_state': 338375584.0, 'diff_eval': 113474.30846605197} step=90000
2025-12-05 23:48.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.66it/s, critic_loss=4.1e+15, actor_loss=-2.83e+8]


2025-12-05 23:48.24 [info     ] DDPG_20251205231906: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.005029228210449219, 'time_algorithm_update': 0.011438011884689332, 'critic_loss': 4100080300938232.0, 'actor_loss': -283085020.8, 'time_step': 0.01672566795349121, 'td_error': 3343342227029843.5, 'value_scale': 272919693.95473593, 'discounted_advantage': -357705096.3724946, 'initial_state': 354131488.0, 'diff_eval': 113474.30846605197} step=91000
2025-12-05 23:48.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.59it/s, critic_loss=4.5e+15, actor_loss=-2.96e+8]


2025-12-05 23:48.42 [info     ] DDPG_20251205231906: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.004481115341186523, 'time_algorithm_update': 0.01050368857383728, 'critic_loss': 4506988887532372.0, 'actor_loss': -295863079.488, 'time_step': 0.015208202600479125, 'td_error': 3650488092805076.0, 'value_scale': 285113324.85163456, 'discounted_advantage': -373569872.6284402, 'initial_state': 370122368.0, 'diff_eval': 113474.30846605197} step=92000
2025-12-05 23:48.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.41it/s, critic_loss=4.93e+15, actor_loss=-3.09e+8]


2025-12-05 23:49.02 [info     ] DDPG_20251205231906: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.00478238844871521, 'time_algorithm_update': 0.010957728147506713, 'critic_loss': 4942174651355759.0, 'actor_loss': -309218783.36, 'time_step': 0.015990338325500487, 'td_error': 3989136616140685.0, 'value_scale': 297873428.69237214, 'discounted_advantage': -391382600.51389337, 'initial_state': 386833696.0, 'diff_eval': 113474.30846605197} step=93000
2025-12-05 23:49.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.88it/s, critic_loss=5.33e+15, actor_loss=-3.23e+8]


2025-12-05 23:49.21 [info     ] DDPG_20251205231906: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.004899859428405762, 'time_algorithm_update': 0.01120801591873169, 'critic_loss': 5335824550158402.0, 'actor_loss': -323077664.736, 'time_step': 0.016361724615097047, 'td_error': 4351174300481871.5, 'value_scale': 311158850.5984912, 'discounted_advantage': -406737133.98569673, 'initial_state': 404243648.0, 'diff_eval': 113474.30846605197} step=94000
2025-12-05 23:49.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.29it/s, critic_loss=5.86e+15, actor_loss=-3.37e+8]


2025-12-05 23:49.40 [info     ] DDPG_20251205231906: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.00481280517578125, 'time_algorithm_update': 0.010926353454589843, 'critic_loss': 5861324230762693.0, 'actor_loss': -336959722.208, 'time_step': 0.01599920892715454, 'td_error': 4736519521418558.0, 'value_scale': 324450573.8105616, 'discounted_advantage': -426118904.2770675, 'initial_state': 421667680.0, 'diff_eval': 113474.30846605197} step=95000
2025-12-05 23:49.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.31it/s, critic_loss=6.4e+15, actor_loss=-3.51e+8]


2025-12-05 23:50.00 [info     ] DDPG_20251205231906: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.004778510808944702, 'time_algorithm_update': 0.010948879718780518, 'critic_loss': 6398119758570455.0, 'actor_loss': -351380781.696, 'time_step': 0.015987207412719726, 'td_error': 5156127270169103.0, 'value_scale': 338448344.9723386, 'discounted_advantage': -444375596.8642702, 'initial_state': 440096064.0, 'diff_eval': 113474.30846605197} step=96000
2025-12-05 23:50.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.73it/s, critic_loss=6.95e+15, actor_loss=-3.66e+8]


2025-12-05 23:50.19 [info     ] DDPG_20251205231906: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.004750016927719116, 'time_algorithm_update': 0.010874899625778199, 'critic_loss': 6950932637301080.0, 'actor_loss': -366435316.864, 'time_step': 0.01588150954246521, 'td_error': 5603942350843280.0, 'value_scale': 352753068.8952221, 'discounted_advantage': -463241646.835673, 'initial_state': 458913952.0, 'diff_eval': 113474.30846605197} step=97000
2025-12-05 23:50.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.87it/s, critic_loss=7.55e+15, actor_loss=-3.82e+8]


2025-12-05 23:50.37 [info     ] DDPG_20251205231906: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.004503224849700928, 'time_algorithm_update': 0.010612574815750122, 'critic_loss': 7549861502769955.0, 'actor_loss': -381942583.008, 'time_step': 0.015355496406555176, 'td_error': 6084681563812596.0, 'value_scale': 367440281.9949707, 'discounted_advantage': -483303389.3143947, 'initial_state': 478223104.0, 'diff_eval': 113474.30846605197} step=98000
2025-12-05 23:50.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.82it/s, critic_loss=8.16e+15, actor_loss=-3.97e+8]


2025-12-05 23:50.56 [info     ] DDPG_20251205231906: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.0047541985511779785, 'time_algorithm_update': 0.010880101919174195, 'critic_loss': 8166166359856644.0, 'actor_loss': -397515068.032, 'time_step': 0.01587505602836609, 'td_error': 6603125576361972.0, 'value_scale': 382888420.34870076, 'discounted_advantage': -500833890.9769948, 'initial_state': 498529568.0, 'diff_eval': 113474.30846605197} step=99000
2025-12-05 23:50.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.34it/s, critic_loss=8.87e+15, actor_loss=-4.14e+8]


2025-12-05 23:51.15 [info     ] DDPG_20251205231906: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.004655977010726928, 'time_algorithm_update': 0.010845561027526856, 'critic_loss': 8878242484795212.0, 'actor_loss': -414046269.76, 'time_step': 0.01574453616142273, 'td_error': 7159062326135519.0, 'value_scale': 398605865.1835708, 'discounted_advantage': -520996203.52466166, 'initial_state': 519201600.0, 'diff_eval': 113474.30846605197} step=100000
2025-12-05 23:51.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.92it/s, critic_loss=9.66e+15, actor_loss=-4.31e+8]


2025-12-05 23:51.34 [info     ] DDPG_20251205231906: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.004671033143997192, 'time_algorithm_update': 0.010699032545089722, 'critic_loss': 9670893308066398.0, 'actor_loss': -430864027.68, 'time_step': 0.01560419750213623, 'td_error': 7754599645635978.0, 'value_scale': 414624370.0922045, 'discounted_advantage': -544021591.6117027, 'initial_state': 540285120.0, 'diff_eval': 113474.30846605197} step=101000
2025-12-05 23:51.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.91it/s, critic_loss=1.04e+16, actor_loss=-4.48e+8]


2025-12-05 23:51.54 [info     ] DDPG_20251205231906: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.0052184991836547855, 'time_algorithm_update': 0.011737730979919434, 'critic_loss': 1.0386596293442012e+16, 'actor_loss': -448437355.008, 'time_step': 0.0172429940700531, 'td_error': 8391124065372011.0, 'value_scale': 431261012.5766974, 'discounted_advantage': -565339644.5764489, 'initial_state': 562126720.0, 'diff_eval': 113474.30846605197} step=102000
2025-12-05 23:51.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.71it/s, critic_loss=1.13e+16, actor_loss=-4.66e+8]


2025-12-05 23:52.14 [info     ] DDPG_20251205231906: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.0050896165370941165, 'time_algorithm_update': 0.011380135774612428, 'critic_loss': 1.1276682302945492e+16, 'actor_loss': -466107906.048, 'time_step': 0.016719900369644165, 'td_error': 9073088683351164.0, 'value_scale': 448457779.31601006, 'discounted_advantage': -585971781.3321798, 'initial_state': 584796992.0, 'diff_eval': 113474.30846605197} step=103000
2025-12-05 23:52.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.43it/s, critic_loss=1.21e+16, actor_loss=-4.84e+8]


2025-12-05 23:52.33 [info     ] DDPG_20251205231906: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.004603229999542237, 'time_algorithm_update': 0.010652239561080933, 'critic_loss': 1.2140292627576652e+16, 'actor_loss': -484273658.208, 'time_step': 0.015486194849014282, 'td_error': 9790827275595172.0, 'value_scale': 465682828.6974015, 'discounted_advantage': -609854446.5097646, 'initial_state': 607426752.0, 'diff_eval': 113474.30846605197} step=104000
2025-12-05 23:52.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.63it/s, critic_loss=1.32e+16, actor_loss=-5.03e+8]


2025-12-05 23:52.52 [info     ] DDPG_20251205231906: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.004649523258209228, 'time_algorithm_update': 0.010802572011947632, 'critic_loss': 1.3156185015765172e+16, 'actor_loss': -502991401.792, 'time_step': 0.01568744683265686, 'td_error': 1.0561584826638116e+16, 'value_scale': 483493054.71248955, 'discounted_advantage': -634111676.0605125, 'initial_state': 630884672.0, 'diff_eval': 113474.30846605197} step=105000
2025-12-05 23:52.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.73it/s, critic_loss=1.41e+16, actor_loss=-5.22e+8]


2025-12-05 23:53.12 [info     ] DDPG_20251205231906: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.005217545986175537, 'time_algorithm_update': 0.011539024114608765, 'critic_loss': 1.4083104527897068e+16, 'actor_loss': -522389166.528, 'time_step': 0.01700990843772888, 'td_error': 1.140128834257571e+16, 'value_scale': 502451841.6596815, 'discounted_advantage': -655591232.4212277, 'initial_state': 655786176.0, 'diff_eval': 113474.30846605197} step=106000
2025-12-05 23:53.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.10it/s, critic_loss=1.52e+16, actor_loss=-5.43e+8]


2025-12-05 23:53.33 [info     ] DDPG_20251205231906: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.005480135440826416, 'time_algorithm_update': 0.011768914699554444, 'critic_loss': 1.5163948995149038e+16, 'actor_loss': -542647130.496, 'time_step': 0.0175039963722229, 'td_error': 1.2293714269838514e+16, 'value_scale': 521729933.33445096, 'discounted_advantage': -679532550.2406552, 'initial_state': 681117312.0, 'diff_eval': 113474.30846605197} step=107000
2025-12-05 23:53.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.92it/s, critic_loss=1.64e+16, actor_loss=-5.63e+8]


2025-12-05 23:53.54 [info     ] DDPG_20251205231906: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.004692916870117188, 'time_algorithm_update': 0.011951771020889282, 'critic_loss': 1.6369646180105192e+16, 'actor_loss': -562971785.6, 'time_step': 0.017926813364028932, 'td_error': 1.3235032928484332e+16, 'value_scale': 541149331.5708299, 'discounted_advantage': -706372993.8258344, 'initial_state': 706673856.0, 'diff_eval': 113474.30846605197} step=108000
2025-12-05 23:53.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.59it/s, critic_loss=1.77e+16, actor_loss=-5.83e+8]


2025-12-05 23:54.14 [info     ] DDPG_20251205231906: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.004460423946380615, 'time_algorithm_update': 0.011756226539611817, 'critic_loss': 1.774773851565274e+16, 'actor_loss': -583566074.048, 'time_step': 0.016478242874145507, 'td_error': 1.4237801633222792e+16, 'value_scale': 561064232.1441743, 'discounted_advantage': -734079707.7741812, 'initial_state': 732949056.0, 'diff_eval': 113474.30846605197} step=109000
2025-12-05 23:54.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.87it/s, critic_loss=1.91e+16, actor_loss=-6.05e+8]


2025-12-05 23:54.33 [info     ] DDPG_20251205231906: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.004760822057723999, 'time_algorithm_update': 0.010863078117370606, 'critic_loss': 1.913381079306207e+16, 'actor_loss': -605254852.672, 'time_step': 0.015854596376419068, 'td_error': 1.5310046279006876e+16, 'value_scale': 581666539.906119, 'discounted_advantage': -761266523.0701195, 'initial_state': 760056704.0, 'diff_eval': 113474.30846605197} step=110000
2025-12-05 23:54.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.76it/s, critic_loss=2.03e+16, actor_loss=-6.28e+8]


2025-12-05 23:54.52 [info     ] DDPG_20251205231906: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.004527251720428466, 'time_algorithm_update': 0.010668935060501098, 'critic_loss': 2.0349229096937456e+16, 'actor_loss': -627874605.312, 'time_step': 0.015413742303848267, 'td_error': 1.644176283085224e+16, 'value_scale': 602866276.0335289, 'discounted_advantage': -786081764.0484174, 'initial_state': 788043200.0, 'diff_eval': 113474.30846605197} step=111000
2025-12-05 23:54.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.11it/s, critic_loss=2.19e+16, actor_loss=-6.5e+8]


2025-12-05 23:55.12 [info     ] DDPG_20251205231906: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.005214120388031006, 'time_algorithm_update': 0.01209964895248413, 'critic_loss': 2.193027656656198e+16, 'actor_loss': -649964421.056, 'time_step': 0.01754764151573181, 'td_error': 1.76212453551041e+16, 'value_scale': 623915897.7870914, 'discounted_advantage': -814987555.1938155, 'initial_state': 815848512.0, 'diff_eval': 113474.30846605197} step=112000
2025-12-05 23:55.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.46it/s, critic_loss=2.36e+16, actor_loss=-6.72e+8]


2025-12-05 23:55.32 [info     ] DDPG_20251205231906: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.004829711675643921, 'time_algorithm_update': 0.011077922105789185, 'critic_loss': 2.3613766437563268e+16, 'actor_loss': -672508645.056, 'time_step': 0.016193540573120117, 'td_error': 1.8868840718278156e+16, 'value_scale': 645500026.9036044, 'discounted_advantage': -843405158.3678695, 'initial_state': 844379648.0, 'diff_eval': 113474.30846605197} step=113000
2025-12-05 23:55.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:18<00:00, 53.70it/s, critic_loss=2.5e+16, actor_loss=-6.95e+8]


2025-12-05 23:55.53 [info     ] DDPG_20251205231906: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.00509744381904602, 'time_algorithm_update': 0.01297413158416748, 'critic_loss': 2.502045823448541e+16, 'actor_loss': -695515369.728, 'time_step': 0.01831237840652466, 'td_error': 2.0201009302082116e+16, 'value_scale': 667901562.1793797, 'discounted_advantage': -870945328.1887548, 'initial_state': 874000192.0, 'diff_eval': 113474.30846605197} step=114000
2025-12-05 23:55.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.66it/s, critic_loss=2.71e+16, actor_loss=-7.19e+8]


2025-12-05 23:56.13 [info     ] DDPG_20251205231906: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.005359568357467651, 'time_algorithm_update': 0.010890003204345702, 'critic_loss': 2.713100959292483e+16, 'actor_loss': -719306158.528, 'time_step': 0.016479670763015746, 'td_error': 2.1602733621138056e+16, 'value_scale': 690350474.2866722, 'discounted_advantage': -903716916.6655645, 'initial_state': 903620352.0, 'diff_eval': 113474.30846605197} step=115000
2025-12-05 23:56.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.41it/s, critic_loss=2.88e+16, actor_loss=-7.44e+8]


2025-12-05 23:56.32 [info     ] DDPG_20251205231906: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.004841120719909668, 'time_algorithm_update': 0.011158771991729736, 'critic_loss': 2.882805348413787e+16, 'actor_loss': -743872808.0, 'time_step': 0.016253062963485717, 'td_error': 2.3124581910982184e+16, 'value_scale': 714272621.3445096, 'discounted_advantage': -932223705.117762, 'initial_state': 935155136.0, 'diff_eval': 113474.30846605197} step=116000
2025-12-05 23:56.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.29it/s, critic_loss=3.06e+16, actor_loss=-7.69e+8]


2025-12-05 23:56.53 [info     ] DDPG_20251205231906: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.005268120765686035, 'time_algorithm_update': 0.011547898769378662, 'critic_loss': 3.0635512339835652e+16, 'actor_loss': -769545673.152, 'time_step': 0.01710843062400818, 'td_error': 2.4731720748648784e+16, 'value_scale': 738737745.5892707, 'discounted_advantage': -961492915.1238016, 'initial_state': 967482432.0, 'diff_eval': 113474.30846605197} step=117000
2025-12-05 23:56.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.22it/s, critic_loss=3.3e+16, actor_loss=-7.95e+8]


2025-12-05 23:57.13 [info     ] DDPG_20251205231906: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.005763905763626099, 'time_algorithm_update': 0.011395421981811524, 'critic_loss': 3.3018993453150764e+16, 'actor_loss': -795418556.8, 'time_step': 0.01743866229057312, 'td_error': 2.641253534437009e+16, 'value_scale': 763246654.6119027, 'discounted_advantage': -994166802.9018829, 'initial_state': 999830144.0, 'diff_eval': 113474.30846605197} step=118000
2025-12-05 23:57.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.37it/s, critic_loss=3.56e+16, actor_loss=-8.2e+8]


2025-12-05 23:57.32 [info     ] DDPG_20251205231906: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.004603493928909302, 'time_algorithm_update': 0.01064213466644287, 'critic_loss': 3.5607862542891744e+16, 'actor_loss': -820331957.632, 'time_step': 0.015497986078262329, 'td_error': 2.8193914336810204e+16, 'value_scale': 788178445.5322716, 'discounted_advantage': -1029958077.9813135, 'initial_state': 1032695616.0, 'diff_eval': 113474.30846605197} step=119000
2025-12-05 23:57.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.58it/s, critic_loss=3.74e+16, actor_loss=-8.48e+8]


2025-12-05 23:57.52 [info     ] DDPG_20251205231906: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.004992575168609619, 'time_algorithm_update': 0.011791583299636841, 'critic_loss': 3.7392751153249256e+16, 'actor_loss': -847992788.736, 'time_step': 0.017050427198410035, 'td_error': 3.0090891258210372e+16, 'value_scale': 814608753.5691534, 'discounted_advantage': -1056999613.4923959, 'initial_state': 1067644160.0, 'diff_eval': 113474.30846605197} step=120000
2025-12-05 23:57.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.96it/s, critic_loss=4.04e+16, actor_loss=-8.75e+8]


2025-12-05 23:58.11 [info     ] DDPG_20251205231906: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.0047253105640411375, 'time_algorithm_update': 0.010877506732940674, 'critic_loss': 4.037329019059202e+16, 'actor_loss': -875625113.344, 'time_step': 0.015854708909988403, 'td_error': 3.2032543295891696e+16, 'value_scale': 839825802.0653814, 'discounted_advantage': -1097785935.9258544, 'initial_state': 1100884992.0, 'diff_eval': 113474.30846605197} step=121000
2025-12-05 23:58.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.24it/s, critic_loss=4.26e+16, actor_loss=-9.03e+8]


2025-12-05 23:58.31 [info     ] DDPG_20251205231906: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.004881514549255371, 'time_algorithm_update': 0.011415592432022094, 'critic_loss': 4.258162563961638e+16, 'actor_loss': -903409046.4, 'time_step': 0.016557363986968995, 'td_error': 3.4156518378636212e+16, 'value_scale': 867423381.6395642, 'discounted_advantage': -1128589259.9689276, 'initial_state': 1137411072.0, 'diff_eval': 113474.30846605197} step=122000
2025-12-05 23:58.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.81it/s, critic_loss=4.54e+16, actor_loss=-9.33e+8]


2025-12-05 23:58.50 [info     ] DDPG_20251205231906: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.004661754131317138, 'time_algorithm_update': 0.010926224231719971, 'critic_loss': 4.538859873707334e+16, 'actor_loss': -933080958.528, 'time_step': 0.01585199761390686, 'td_error': 3.633711698789334e+16, 'value_scale': 894499628.6135792, 'discounted_advantage': -1164308587.0516217, 'initial_state': 1173042304.0, 'diff_eval': 113474.30846605197} step=123000
2025-12-05 23:58.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.79it/s, critic_loss=4.84e+16, actor_loss=-9.61e+8]


2025-12-05 23:59.10 [info     ] DDPG_20251205231906: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.005123378992080688, 'time_algorithm_update': 0.011562613010406494, 'critic_loss': 4.83913533703481e+16, 'actor_loss': -961507412.928, 'time_step': 0.016963467836380006, 'td_error': 3.867302979178983e+16, 'value_scale': 922466824.5699916, 'discounted_advantage': -1202871387.616432, 'initial_state': 1209926016.0, 'diff_eval': 113474.30846605197} step=124000
2025-12-05 23:59.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.00it/s, critic_loss=5.17e+16, actor_loss=-9.91e+8]


2025-12-05 23:59.30 [info     ] DDPG_20251205231906: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.004852505445480347, 'time_algorithm_update': 0.011222758531570435, 'critic_loss': 5.1704324532111144e+16, 'actor_loss': -990968056.0, 'time_step': 0.016334364652633666, 'td_error': 4.114002938001083e+16, 'value_scale': 951257952.0670578, 'discounted_advantage': -1240147272.1006098, 'initial_state': 1247987840.0, 'diff_eval': 113474.30846605197} step=125000
2025-12-05 23:59.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.45it/s, critic_loss=5.47e+16, actor_loss=-1.02e+9]


2025-12-05 23:59.49 [info     ] DDPG_20251205231906: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.004705678224563598, 'time_algorithm_update': 0.010765305042266846, 'critic_loss': 5.472964721062917e+16, 'actor_loss': -1022594001.6, 'time_step': 0.01570378828048706, 'td_error': 4.373625439674454e+16, 'value_scale': 980693231.4031852, 'discounted_advantage': -1278233895.084501, 'initial_state': 1286875008.0, 'diff_eval': 113474.30846605197} step=126000
2025-12-05 23:59.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.48it/s, critic_loss=5.83e+16, actor_loss=-1.05e+9]


2025-12-06 00:00.08 [info     ] DDPG_20251205231906: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.004863886833190918, 'time_algorithm_update': 0.011111115455627442, 'critic_loss': 5.83087526020949e+16, 'actor_loss': -1054035728.064, 'time_step': 0.016218948125839232, 'td_error': 4.647877827132023e+16, 'value_scale': 1010794270.9270746, 'discounted_advantage': -1317319052.8932333, 'initial_state': 1326542208.0, 'diff_eval': 113474.30846605197} step=127000
2025-12-06 00:00.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.75it/s, critic_loss=6.17e+16, actor_loss=-1.09e+9]


2025-12-06 00:00.29 [info     ] DDPG_20251205231906: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.005297810554504395, 'time_algorithm_update': 0.012009242057800292, 'critic_loss': 6.176954140916397e+16, 'actor_loss': -1085499010.304, 'time_step': 0.017588029146194457, 'td_error': 4.930471616351691e+16, 'value_scale': 1041064265.5020956, 'discounted_advantage': -1354111683.715912, 'initial_state': 1366662528.0, 'diff_eval': 113474.30846605197} step=128000
2025-12-06 00:00.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.07it/s, critic_loss=6.54e+16, actor_loss=-1.12e+9]


2025-12-06 00:00.49 [info     ] DDPG_20251205231906: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.0047977614402771, 'time_algorithm_update': 0.01159420657157898, 'critic_loss': 6.550444597049295e+16, 'actor_loss': -1117882951.552, 'time_step': 0.016620885133743286, 'td_error': 5.231353979120231e+16, 'value_scale': 1072344061.4249791, 'discounted_advantage': -1392411396.341774, 'initial_state': 1408193280.0, 'diff_eval': 113474.30846605197} step=129000
2025-12-06 00:00.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.38it/s, critic_loss=6.92e+16, actor_loss=-1.15e+9]


2025-12-06 00:01.08 [info     ] DDPG_20251205231906: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.0050200016498565675, 'time_algorithm_update': 0.011209206104278564, 'critic_loss': 6.923368385290332e+16, 'actor_loss': -1151609278.848, 'time_step': 0.0164997763633728, 'td_error': 5.542445089460085e+16, 'value_scale': 1103568454.9002514, 'discounted_advantage': -1433775379.8001547, 'initial_state': 1449605888.0, 'diff_eval': 113474.30846605197} step=130000
2025-12-06 00:01.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.18it/s, critic_loss=7.34e+16, actor_loss=-1.18e+9]


2025-12-06 00:01.28 [info     ] DDPG_20251205231906: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.005102615356445312, 'time_algorithm_update': 0.01147573184967041, 'critic_loss': 7.346894526877584e+16, 'actor_loss': -1184627769.6, 'time_step': 0.016846171617507934, 'td_error': 5.868968132789277e+16, 'value_scale': 1135357821.981559, 'discounted_advantage': -1476303793.653381, 'initial_state': 1491757568.0, 'diff_eval': 113474.30846605197} step=131000
2025-12-06 00:01.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.04it/s, critic_loss=7.7e+16, actor_loss=-1.22e+9]


2025-12-06 00:01.48 [info     ] DDPG_20251205231906: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.004847342014312744, 'time_algorithm_update': 0.010963213920593263, 'critic_loss': 7.696353225153394e+16, 'actor_loss': -1218984558.336, 'time_step': 0.01606630539894104, 'td_error': 6.2169694576158296e+16, 'value_scale': 1168748499.533948, 'discounted_advantage': -1513136638.2116158, 'initial_state': 1535987584.0, 'diff_eval': 113474.30846605197} step=132000
2025-12-06 00:01.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.96it/s, critic_loss=8.21e+16, actor_loss=-1.25e+9]


2025-12-06 00:02.07 [info     ] DDPG_20251205231906: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.00486956787109375, 'time_algorithm_update': 0.01095405340194702, 'critic_loss': 8.216327394790526e+16, 'actor_loss': -1253940367.232, 'time_step': 0.01608973717689514, 'td_error': 6.572290849431983e+16, 'value_scale': 1201389321.2740989, 'discounted_advantage': -1557897537.1154625, 'initial_state': 1579033728.0, 'diff_eval': 113474.30846605197} step=133000
2025-12-06 00:02.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.33it/s, critic_loss=8.74e+16, actor_loss=-1.29e+9]


2025-12-06 00:02.27 [info     ] DDPG_20251205231906: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.005142764091491699, 'time_algorithm_update': 0.011389537811279297, 'critic_loss': 8.745665827077043e+16, 'actor_loss': -1288119936.384, 'time_step': 0.016796690940856934, 'td_error': 6.947883588092128e+16, 'value_scale': 1234940881.522213, 'discounted_advantage': -1603208868.5937872, 'initial_state': 1623351168.0, 'diff_eval': 113474.30846605197} step=134000
2025-12-06 00:02.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.66it/s, critic_loss=9.17e+16, actor_loss=-1.32e+9]


2025-12-06 00:02.47 [info     ] DDPG_20251205231906: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.005082449197769165, 'time_algorithm_update': 0.011088068962097169, 'critic_loss': 9.17756053527315e+16, 'actor_loss': -1324527388.544, 'time_step': 0.016433940649032593, 'td_error': 7.34228319710788e+16, 'value_scale': 1269495733.8675609, 'discounted_advantage': -1645273911.9534044, 'initial_state': 1669120768.0, 'diff_eval': 113474.30846605197} step=135000
2025-12-06 00:02.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:18<00:00, 55.46it/s, critic_loss=9.83e+16, actor_loss=-1.36e+9]


2025-12-06 00:03.08 [info     ] DDPG_20251205231906: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.00543200159072876, 'time_algorithm_update': 0.011971320390701295, 'critic_loss': 9.827108280478152e+16, 'actor_loss': -1360112144.128, 'time_step': 0.017672647953033447, 'td_error': 7.74837077658495e+16, 'value_scale': 1303420116.83487, 'discounted_advantage': -1698544042.6456997, 'initial_state': 1714015488.0, 'diff_eval': 113474.30846605197} step=136000
2025-12-06 00:03.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.83it/s, critic_loss=1.03e+17, actor_loss=-1.4e+9]


2025-12-06 00:03.29 [info     ] DDPG_20251205231906: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.005491190433502198, 'time_algorithm_update': 0.0120859375, 'critic_loss': 1.0274779347586278e+17, 'actor_loss': -1398550637.952, 'time_step': 0.01786596083641052, 'td_error': 8.185606458724398e+16, 'value_scale': 1339865652.036882, 'discounted_advantage': -1740127712.7557244, 'initial_state': 1762436096.0, 'diff_eval': 113474.30846605197} step=137000
2025-12-06 00:03.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.98it/s, critic_loss=1.08e+17, actor_loss=-1.44e+9]


2025-12-06 00:03.48 [info     ] DDPG_20251205231906: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.004874041080474853, 'time_algorithm_update': 0.010904711961746217, 'critic_loss': 1.0822181322257072e+17, 'actor_loss': -1435852726.144, 'time_step': 0.016052911758422852, 'td_error': 8.646045720793549e+16, 'value_scale': 1376903094.7795475, 'discounted_advantage': -1786384851.8984969, 'initial_state': 1811527552.0, 'diff_eval': 113474.30846605197} step=138000
2025-12-06 00:03.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:19<00:00, 52.08it/s, critic_loss=1.14e+17, actor_loss=-1.48e+9]


2025-12-06 00:04.10 [info     ] DDPG_20251205231906: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.00660443115234375, 'time_algorithm_update': 0.012004651308059692, 'critic_loss': 1.1432172961120282e+17, 'actor_loss': -1476019737.088, 'time_step': 0.01886428213119507, 'td_error': 9.125411628782518e+16, 'value_scale': 1414301089.6227996, 'discounted_advantage': -1835122555.2132282, 'initial_state': 1860991232.0, 'diff_eval': 113474.30846605197} step=139000
2025-12-06 00:04.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:18<00:00, 52.81it/s, critic_loss=1.21e+17, actor_loss=-1.52e+9]


2025-12-06 00:04.33 [info     ] DDPG_20251205231906: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.0053363823890686035, 'time_algorithm_update': 0.01275043511390686, 'critic_loss': 1.2058598147732851e+17, 'actor_loss': -1515575451.008, 'time_step': 0.018359336853027344, 'td_error': 9.630320671386778e+16, 'value_scale': 1452794286.5582564, 'discounted_advantage': -1883491133.5069084, 'initial_state': 1911855872.0, 'diff_eval': 113474.30846605197} step=140000
2025-12-06 00:04.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.63it/s, critic_loss=1.26e+17, actor_loss=-1.56e+9]


2025-12-06 00:04.54 [info     ] DDPG_20251205231906: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.005209556341171265, 'time_algorithm_update': 0.01247016453742981, 'critic_loss': 1.2610659491045914e+17, 'actor_loss': -1557639001.472, 'time_step': 0.01795260167121887, 'td_error': 1.0162072996112552e+17, 'value_scale': 1492621116.3788767, 'discounted_advantage': -1927956783.2945707, 'initial_state': 1964547456.0, 'diff_eval': 113474.30846605197} step=141000
2025-12-06 00:04.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:19<00:00, 51.34it/s, critic_loss=1.35e+17, actor_loss=-1.6e+9]


2025-12-06 00:05.16 [info     ] DDPG_20251205231906: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.005288022994995117, 'time_algorithm_update': 0.013571412086486816, 'critic_loss': 1.3484419259121952e+17, 'actor_loss': -1598451309.952, 'time_step': 0.019130305767059326, 'td_error': 1.0702697049882371e+17, 'value_scale': 1531055966.6722548, 'discounted_advantage': -1986869440.4354954, 'initial_state': 2015419776.0, 'diff_eval': 113474.30846605197} step=142000
2025-12-06 00:05.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.65it/s, critic_loss=1.43e+17, actor_loss=-1.64e+9]


2025-12-06 00:05.37 [info     ] DDPG_20251205231906: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.005241283655166626, 'time_algorithm_update': 0.012102997779846192, 'critic_loss': 1.4272589246199811e+17, 'actor_loss': -1639403338.624, 'time_step': 0.017612407445907592, 'td_error': 1.127267893527914e+17, 'value_scale': 1570606054.236379, 'discounted_advantage': -2045979138.8006446, 'initial_state': 2067741696.0, 'diff_eval': 113474.30846605197} step=143000
2025-12-06 00:05.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:18<00:00, 55.25it/s, critic_loss=1.48e+17, actor_loss=-1.68e+9]


2025-12-06 00:05.58 [info     ] DDPG_20251205231906: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.005398587942123413, 'time_algorithm_update': 0.01200260853767395, 'critic_loss': 1.4846335718329104e+17, 'actor_loss': -1682316384.0, 'time_step': 0.017747294425964354, 'td_error': 1.1877042174844358e+17, 'value_scale': 1612674969.2942162, 'discounted_advantage': -2088571372.788275, 'initial_state': 2123545344.0, 'diff_eval': 113474.30846605197} step=144000
2025-12-06 00:05.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.88it/s, critic_loss=1.57e+17, actor_loss=-1.73e+9]


2025-12-06 00:06.18 [info     ] DDPG_20251205231906: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.00480838680267334, 'time_algorithm_update': 0.01159150242805481, 'critic_loss': 1.565658507313676e+17, 'actor_loss': -1725943537.408, 'time_step': 0.016654826164245604, 'td_error': 1.249888937239647e+17, 'value_scale': 1654147920.469405, 'discounted_advantage': -2141586177.106663, 'initial_state': 2178418688.0, 'diff_eval': 113474.30846605197} step=145000
2025-12-06 00:06.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.13it/s, critic_loss=1.64e+17, actor_loss=-1.77e+9]


2025-12-06 00:06.37 [info     ] DDPG_20251205231906: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.004949132680892945, 'time_algorithm_update': 0.010873538017272949, 'critic_loss': 1.6416263954851955e+17, 'actor_loss': -1771501799.936, 'time_step': 0.016061347961425783, 'td_error': 1.315414072140666e+17, 'value_scale': 1696779039.9195306, 'discounted_advantage': -2195746168.982988, 'initial_state': 2234803200.0, 'diff_eval': 113474.30846605197} step=146000
2025-12-06 00:06.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:21<00:00, 46.96it/s, critic_loss=1.75e+17, actor_loss=-1.82e+9]


2025-12-06 00:07.02 [info     ] DDPG_20251205231906: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.005571860551834107, 'time_algorithm_update': 0.015062365531921387, 'critic_loss': 1.750159937906556e+17, 'actor_loss': -1815435918.208, 'time_step': 0.020919760704040526, 'td_error': 1.3828749758520683e+17, 'value_scale': 1738804617.5088015, 'discounted_advantage': -2262330660.520072, 'initial_state': 2290420992.0, 'diff_eval': 113474.30846605197} step=147000
2025-12-06 00:07.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.96it/s, critic_loss=1.82e+17, actor_loss=-1.86e+9]


2025-12-06 00:07.23 [info     ] DDPG_20251205231906: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.005375029802322387, 'time_algorithm_update': 0.012160958766937256, 'critic_loss': 1.8209154055781168e+17, 'actor_loss': -1862604986.496, 'time_step': 0.01781372046470642, 'td_error': 1.4543082228972976e+17, 'value_scale': 1783420186.045264, 'discounted_advantage': -2312906636.747339, 'initial_state': 2349623296.0, 'diff_eval': 113474.30846605197} step=148000
2025-12-06 00:07.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.70it/s, critic_loss=1.92e+17, actor_loss=-1.91e+9]


2025-12-06 00:07.44 [info     ] DDPG_20251205231906: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.0053410005569458005, 'time_algorithm_update': 0.011945711135864259, 'critic_loss': 1.9190344963536506e+17, 'actor_loss': -1908679238.912, 'time_step': 0.017566699743270874, 'td_error': 1.5289139673473453e+17, 'value_scale': 1828814184.341995, 'discounted_advantage': -2364836199.901057, 'initial_state': 2409892096.0, 'diff_eval': 113474.30846605197} step=149000
2025-12-06 00:07.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.34it/s, critic_loss=2.02e+17, actor_loss=-1.96e+9]


2025-12-06 00:08.05 [info     ] DDPG_20251205231906: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.0051648647785186765, 'time_algorithm_update': 0.011961337804794312, 'critic_loss': 2.0202456450675296e+17, 'actor_loss': -1956239609.088, 'time_step': 0.017393412351608278, 'td_error': 1.6051927232824326e+17, 'value_scale': 1873215665.421626, 'discounted_advantage': -2429324019.404161, 'initial_state': 2468749056.0, 'diff_eval': 113474.30846605197} step=150000
2025-12-06 00:08.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.85it/s, critic_loss=2.15e+17, actor_loss=-2e+9]  


2025-12-06 00:08.25 [info     ] DDPG_20251205231906: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.005232033729553223, 'time_algorithm_update': 0.011756022930145264, 'critic_loss': 2.1550822296653504e+17, 'actor_loss': -2001086855.68, 'time_step': 0.017262422561645507, 'td_error': 1.6845981509603405e+17, 'value_scale': 1918175592.2347023, 'discounted_advantage': -2495629366.53421, 'initial_state': 2528257536.0, 'diff_eval': 113474.30846605197} step=151000
2025-12-06 00:08.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.58it/s, critic_loss=2.25e+17, actor_loss=-2.05e+9]


2025-12-06 00:08.45 [info     ] DDPG_20251205231906: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.004878270149230957, 'time_algorithm_update': 0.011074666023254395, 'critic_loss': 2.2459824265976035e+17, 'actor_loss': -2050449526.912, 'time_step': 0.016202259302139282, 'td_error': 1.7669745951945104e+17, 'value_scale': 1964222431.31601, 'discounted_advantage': -2555980336.456559, 'initial_state': 2589304320.0, 'diff_eval': 113474.30846605197} step=152000
2025-12-06 00:08.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.64it/s, critic_loss=2.34e+17, actor_loss=-2.1e+9]


2025-12-06 00:09.06 [info     ] DDPG_20251205231906: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.0052915472984313965, 'time_algorithm_update': 0.012042709589004517, 'critic_loss': 2.3380388341375984e+17, 'actor_loss': -2099633675.136, 'time_step': 0.017607420682907104, 'td_error': 1.8529307715533587e+17, 'value_scale': 2011498576.3889356, 'discounted_advantage': -2611996668.7625184, 'initial_state': 2652017152.0, 'diff_eval': 113474.30846605197} step=153000
2025-12-06 00:09.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.37it/s, critic_loss=2.44e+17, actor_loss=-2.15e+9]


2025-12-06 00:09.26 [info     ] DDPG_20251205231906: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.00534865665435791, 'time_algorithm_update': 0.011790507316589356, 'critic_loss': 2.4389110682706406e+17, 'actor_loss': -2150141556.864, 'time_step': 0.01739345645904541, 'td_error': 1.943327048914455e+17, 'value_scale': 2059985720.3285835, 'discounted_advantage': -2670929957.7979555, 'initial_state': 2716423936.0, 'diff_eval': 113474.30846605197} step=154000
2025-12-06 00:09.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.76it/s, critic_loss=2.55e+17, actor_loss=-2.2e+9]


2025-12-06 00:09.45 [info     ] DDPG_20251205231906: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.004720086336135864, 'time_algorithm_update': 0.010892131805419921, 'critic_loss': 2.5531827414596426e+17, 'actor_loss': -2202472852.096, 'time_step': 0.01586913776397705, 'td_error': 2.038374853263066e+17, 'value_scale': 2109514155.8155909, 'discounted_advantage': -2734225135.082045, 'initial_state': 2782085888.0, 'diff_eval': 113474.30846605197} step=155000
2025-12-06 00:09.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.06it/s, critic_loss=2.68e+17, actor_loss=-2.25e+9]


2025-12-06 00:10.06 [info     ] DDPG_20251205231906: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.005125503540039062, 'time_algorithm_update': 0.011504237413406372, 'critic_loss': 2.6825012969018595e+17, 'actor_loss': -2254788274.176, 'time_step': 0.01689089107513428, 'td_error': 2.137670838102552e+17, 'value_scale': 2160278757.310981, 'discounted_advantage': -2795222375.687243, 'initial_state': 2849338112.0, 'diff_eval': 113474.30846605197} step=156000
2025-12-06 00:10.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.10it/s, critic_loss=2.83e+17, actor_loss=-2.31e+9]


2025-12-06 00:10.26 [info     ] DDPG_20251205231906: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.005239076852798462, 'time_algorithm_update': 0.011673217535018921, 'critic_loss': 2.8276832132919197e+17, 'actor_loss': -2306949621.504, 'time_step': 0.017180023431777953, 'td_error': 2.2387918377568912e+17, 'value_scale': 2210074525.5993295, 'discounted_advantage': -2867497969.237344, 'initial_state': 2915388672.0, 'diff_eval': 113474.30846605197} step=157000
2025-12-06 00:10.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.18it/s, critic_loss=2.93e+17, actor_loss=-2.36e+9]


2025-12-06 00:10.46 [info     ] DDPG_20251205231906: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.004988346576690674, 'time_algorithm_update': 0.011332446813583374, 'critic_loss': 2.926426893415822e+17, 'actor_loss': -2360843287.552, 'time_step': 0.016578051805496215, 'td_error': 2.3456642744460483e+17, 'value_scale': 2262398329.2808046, 'discounted_advantage': -2927404674.4341397, 'initial_state': 2984881152.0, 'diff_eval': 113474.30846605197} step=158000
2025-12-06 00:10.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:18<00:00, 55.10it/s, critic_loss=3.11e+17, actor_loss=-2.42e+9]


2025-12-06 00:11.07 [info     ] DDPG_20251205231906: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.0054298827648162845, 'time_algorithm_update': 0.012077113628387452, 'critic_loss': 3.116268208744185e+17, 'actor_loss': -2416970951.68, 'time_step': 0.01778855299949646, 'td_error': 2.4542059886717043e+17, 'value_scale': 2312961346.011735, 'discounted_advantage': -3008283820.9166064, 'initial_state': 3051731968.0, 'diff_eval': 113474.30846605197} step=159000
2025-12-06 00:11.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.60it/s, critic_loss=3.22e+17, actor_loss=-2.47e+9]


2025-12-06 00:11.28 [info     ] DDPG_20251205231906: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.0054454820156097416, 'time_algorithm_update': 0.01193778419494629, 'critic_loss': 3.2257075126095315e+17, 'actor_loss': -2470272065.536, 'time_step': 0.017645967960357667, 'td_error': 2.5711383278468326e+17, 'value_scale': 2367796905.173512, 'discounted_advantage': -3068415271.2229743, 'initial_state': 3124588288.0, 'diff_eval': 113474.30846605197} step=160000
2025-12-06 00:11.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.12it/s, critic_loss=3.39e+17, actor_loss=-2.53e+9]


2025-12-06 00:11.48 [info     ] DDPG_20251205231906: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.004978211164474488, 'time_algorithm_update': 0.011361464977264405, 'critic_loss': 3.389321767512472e+17, 'actor_loss': -2528615186.944, 'time_step': 0.016598937273025513, 'td_error': 2.691108596568729e+17, 'value_scale': 2422173684.9488683, 'discounted_advantage': -3136945282.8895893, 'initial_state': 3196773888.0, 'diff_eval': 113474.30846605197} step=161000
2025-12-06 00:11.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.40it/s, critic_loss=3.57e+17, actor_loss=-2.59e+9]


2025-12-06 00:12.08 [info     ] DDPG_20251205231906: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.005164034366607666, 'time_algorithm_update': 0.011661908388137817, 'critic_loss': 3.5695627537564774e+17, 'actor_loss': -2585501780.736, 'time_step': 0.01708383274078369, 'td_error': 2.8142729325530752e+17, 'value_scale': 2476173086.2430844, 'discounted_advantage': -3214517769.3853593, 'initial_state': 3268321536.0, 'diff_eval': 113474.30846605197} step=162000
2025-12-06 00:12.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.97it/s, critic_loss=3.67e+17, actor_loss=-2.64e+9]


2025-12-06 00:12.28 [info     ] DDPG_20251205231906: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.00499934458732605, 'time_algorithm_update': 0.011400572538375854, 'critic_loss': 3.670280849759603e+17, 'actor_loss': -2643830872.832, 'time_step': 0.016651036500930787, 'td_error': 2.9451927162177555e+17, 'value_scale': 2534321484.284996, 'discounted_advantage': -3265036224.7746964, 'initial_state': 3345904384.0, 'diff_eval': 113474.30846605197} step=163000
2025-12-06 00:12.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.72it/s, critic_loss=3.86e+17, actor_loss=-2.7e+9]


2025-12-06 00:12.47 [info     ] DDPG_20251205231906: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.004629058837890625, 'time_algorithm_update': 0.010778073787689208, 'critic_loss': 3.8612596748068064e+17, 'actor_loss': -2701723266.304, 'time_step': 0.015652889728546143, 'td_error': 3.076209505558633e+17, 'value_scale': 2589506432.402347, 'discounted_advantage': -3341543041.9372053, 'initial_state': 3419068160.0, 'diff_eval': 113474.30846605197} step=164000
2025-12-06 00:12.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.59it/s, critic_loss=4.03e+17, actor_loss=-2.76e+9]


2025-12-06 00:13.06 [info     ] DDPG_20251205231906: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.004950260162353516, 'time_algorithm_update': 0.011260271072387695, 'critic_loss': 4.025347703085403e+17, 'actor_loss': -2763551729.152, 'time_step': 0.016464256048202515, 'td_error': 3.2159517508921466e+17, 'value_scale': 2647651141.431685, 'discounted_advantage': -3411977833.731652, 'initial_state': 3496294400.0, 'diff_eval': 113474.30846605197} step=165000
2025-12-06 00:13.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.40it/s, critic_loss=4.23e+17, actor_loss=-2.82e+9]


2025-12-06 00:13.27 [info     ] DDPG_20251205231906: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.005232925415039062, 'time_algorithm_update': 0.011886237859725953, 'critic_loss': 4.224986611394659e+17, 'actor_loss': -2824117440.512, 'time_step': 0.017380011081695557, 'td_error': 3.3582122090682816e+17, 'value_scale': 2704628418.950545, 'discounted_advantage': -3496587601.4655094, 'initial_state': 3571738624.0, 'diff_eval': 113474.30846605197} step=166000
2025-12-06 00:13.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:18<00:00, 53.18it/s, critic_loss=4.4e+17, actor_loss=-2.88e+9]


2025-12-06 00:13.49 [info     ] DDPG_20251205231906: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.005513305187225342, 'time_algorithm_update': 0.012575367212295532, 'critic_loss': 4.401521394767521e+17, 'actor_loss': -2884645607.68, 'time_step': 0.0183910653591156, 'td_error': 3.508172505083937e+17, 'value_scale': 2764174242.6018443, 'discounted_advantage': -3569750983.778592, 'initial_state': 3650852864.0, 'diff_eval': 113474.30846605197} step=167000
2025-12-06 00:13.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.83it/s, critic_loss=4.6e+17, actor_loss=-2.95e+9]


2025-12-06 00:14.10 [info     ] DDPG_20251205231906: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.005391870021820068, 'time_algorithm_update': 0.01219012689590454, 'critic_loss': 4.5994472105313094e+17, 'actor_loss': -2949078354.944, 'time_step': 0.017863516569137573, 'td_error': 3.663125043671944e+17, 'value_scale': 2824343837.988265, 'discounted_advantage': -3647117646.037122, 'initial_state': 3730560768.0, 'diff_eval': 113474.30846605197} step=168000
2025-12-06 00:14.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.94it/s, critic_loss=4.86e+17, actor_loss=-3.01e+9]


2025-12-06 00:14.31 [info     ] DDPG_20251205231906: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.005277620553970337, 'time_algorithm_update': 0.01195750069618225, 'critic_loss': 4.8616841138951475e+17, 'actor_loss': -3010329174.272, 'time_step': 0.017501035213470458, 'td_error': 3.819561172648478e+17, 'value_scale': 2882772585.7904444, 'discounted_advantage': -3738026059.262714, 'initial_state': 3807931392.0, 'diff_eval': 113474.30846605197} step=169000
2025-12-06 00:14.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.64it/s, critic_loss=5e+17, actor_loss=-3.08e+9]  


2025-12-06 00:14.51 [info     ] DDPG_20251205231906: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.00514206862449646, 'time_algorithm_update': 0.011597558975219727, 'critic_loss': 4.998989332737069e+17, 'actor_loss': -3076073473.28, 'time_step': 0.017002370834350584, 'td_error': 3.9878148980597114e+17, 'value_scale': 2945936457.0125732, 'discounted_advantage': -3806034830.834896, 'initial_state': 3891834624.0, 'diff_eval': 113474.30846605197} step=170000
2025-12-06 00:14.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:19<00:00, 51.40it/s, critic_loss=5.22e+17, actor_loss=-3.14e+9]


2025-12-06 00:15.13 [info     ] DDPG_20251205231906: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.005117715120315552, 'time_algorithm_update': 0.012074104070663453, 'critic_loss': 5.218007015288377e+17, 'actor_loss': -3140232852.224, 'time_step': 0.01746526789665222, 'td_error': 4.159467814227342e+17, 'value_scale': 3008616527.4903603, 'discounted_advantage': -3882662330.4122696, 'initial_state': 3974915328.0, 'diff_eval': 113474.30846605197} step=171000
2025-12-06 00:15.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.09it/s, critic_loss=5.49e+17, actor_loss=-3.21e+9]


2025-12-06 00:15.33 [info     ] DDPG_20251205231906: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.005080252647399902, 'time_algorithm_update': 0.010986732721328735, 'critic_loss': 5.487856692865196e+17, 'actor_loss': -3206805728.0, 'time_step': 0.016322999238967895, 'td_error': 4.3371598283275206e+17, 'value_scale': 3071604363.72171, 'discounted_advantage': -3969459433.5960493, 'initial_state': 4058685440.0, 'diff_eval': 113474.30846605197} step=172000
2025-12-06 00:15.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.49it/s, critic_loss=5.73e+17, actor_loss=-3.27e+9]


2025-12-06 00:15.53 [info     ] DDPG_20251205231906: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.005005483865737915, 'time_algorithm_update': 0.01095509648323059, 'critic_loss': 5.7397905481321254e+17, 'actor_loss': -3272980755.2, 'time_step': 0.01621929931640625, 'td_error': 4.5192719588968346e+17, 'value_scale': 3134391054.8197823, 'discounted_advantage': -4062198997.334333, 'initial_state': 4141795840.0, 'diff_eval': 113474.30846605197} step=173000
2025-12-06 00:15.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.02it/s, critic_loss=5.96e+17, actor_loss=-3.34e+9]


2025-12-06 00:16.12 [info     ] DDPG_20251205231906: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.004709271907806396, 'time_algorithm_update': 0.010888770818710328, 'critic_loss': 5.962586611682577e+17, 'actor_loss': -3342373098.496, 'time_step': 0.01583644223213196, 'td_error': 4.7108641061937114e+17, 'value_scale': 3199650602.2196145, 'discounted_advantage': -4147780336.993427, 'initial_state': 4228401152.0, 'diff_eval': 113474.30846605197} step=174000
2025-12-06 00:16.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.37it/s, critic_loss=6.17e+17, actor_loss=-3.41e+9]


2025-12-06 00:16.32 [info     ] DDPG_20251205231906: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.004866526126861572, 'time_algorithm_update': 0.012001282453536988, 'critic_loss': 6.158346900984352e+17, 'actor_loss': -3409540755.712, 'time_step': 0.017116581439971922, 'td_error': 4.9138276317856384e+17, 'value_scale': 3268159826.3202014, 'discounted_advantage': -4225500230.566904, 'initial_state': 4319652864.0, 'diff_eval': 113474.30846605197} step=175000
2025-12-06 00:16.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.70it/s, critic_loss=6.48e+17, actor_loss=-3.48e+9]


2025-12-06 00:16.51 [info     ] DDPG_20251205231906: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.00467223334312439, 'time_algorithm_update': 0.010954889297485351, 'critic_loss': 6.479640698510461e+17, 'actor_loss': -3480458981.12, 'time_step': 0.015889639616012575, 'td_error': 5.1213288201499814e+17, 'value_scale': 3335675398.0888515, 'discounted_advantage': -4318111000.580288, 'initial_state': 4409051648.0, 'diff_eval': 113474.30846605197} step=176000
2025-12-06 00:16.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.25it/s, critic_loss=6.72e+17, actor_loss=-3.56e+9]


2025-12-06 00:17.11 [info     ] DDPG_20251205231906: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.004856995344161987, 'time_algorithm_update': 0.011155979633331299, 'critic_loss': 6.720387224656434e+17, 'actor_loss': -3556424829.184, 'time_step': 0.01629015064239502, 'td_error': 5.3372107807563226e+17, 'value_scale': 3405348251.9497066, 'discounted_advantage': -4401147861.829263, 'initial_state': 4501676544.0, 'diff_eval': 113474.30846605197} step=177000
2025-12-06 00:17.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.54it/s, critic_loss=7.04e+17, actor_loss=-3.63e+9]


2025-12-06 00:17.30 [info     ] DDPG_20251205231906: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.004605974197387695, 'time_algorithm_update': 0.011105154275894164, 'critic_loss': 7.041384262963414e+17, 'actor_loss': -3626071609.6, 'time_step': 0.015965424060821534, 'td_error': 5.5549326767993856e+17, 'value_scale': 3472944231.3763623, 'discounted_advantage': -4502826937.30091, 'initial_state': 4591285760.0, 'diff_eval': 113474.30846605197} step=178000
2025-12-06 00:17.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.80it/s, critic_loss=7.3e+17, actor_loss=-3.7e+9] 


2025-12-06 00:17.49 [info     ] DDPG_20251205231906: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.0051801857948303226, 'time_algorithm_update': 0.010767203569412232, 'critic_loss': 7.295594241302819e+17, 'actor_loss': -3699807969.792, 'time_step': 0.016173616886138917, 'td_error': 5.785375687429e+17, 'value_scale': 3544346471.349539, 'discounted_advantage': -4585453673.258891, 'initial_state': 4685988352.0, 'diff_eval': 113474.30846605197} step=179000
2025-12-06 00:17.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.18it/s, critic_loss=7.59e+17, actor_loss=-3.77e+9]


2025-12-06 00:18.09 [info     ] DDPG_20251205231906: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.004910952568054199, 'time_algorithm_update': 0.011163240671157837, 'critic_loss': 7.588129147065719e+17, 'actor_loss': -3773599700.736, 'time_step': 0.016311039209365844, 'td_error': 6.021878615766835e+17, 'value_scale': 3615610546.9371333, 'discounted_advantage': -4677817648.634494, 'initial_state': 4780450816.0, 'diff_eval': 113474.30846605197} step=180000
2025-12-06 00:18.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.65it/s, critic_loss=7.89e+17, actor_loss=-3.85e+9]


2025-12-06 00:18.28 [info     ] DDPG_20251205231906: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.004732852697372437, 'time_algorithm_update': 0.010949063062667847, 'critic_loss': 7.889703708292221e+17, 'actor_loss': -3848034111.744, 'time_step': 0.015926105499267578, 'td_error': 6.27142715281791e+17, 'value_scale': 3689760536.435876, 'discounted_advantage': -4764604780.030032, 'initial_state': 4878712832.0, 'diff_eval': 113474.30846605197} step=181000
2025-12-06 00:18.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.30it/s, critic_loss=8.16e+17, actor_loss=-3.93e+9]


2025-12-06 00:18.47 [info     ] DDPG_20251205231906: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.004708171129226685, 'time_algorithm_update': 0.01081275773048401, 'critic_loss': 8.159188619965948e+17, 'actor_loss': -3930718967.296, 'time_step': 0.015761363983154298, 'td_error': 6.528479105743272e+17, 'value_scale': 3764601214.095557, 'discounted_advantage': -4854601765.089229, 'initial_state': 4978083328.0, 'diff_eval': 113474.30846605197} step=182000
2025-12-06 00:18.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.45it/s, critic_loss=8.59e+17, actor_loss=-4.01e+9]


2025-12-06 00:19.06 [info     ] DDPG_20251205231906: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.004747398376464844, 'time_algorithm_update': 0.010978976488113404, 'critic_loss': 8.594992658848584e+17, 'actor_loss': -4008165491.968, 'time_step': 0.015983668088912963, 'td_error': 6.78973558831994e+17, 'value_scale': 3837671595.8021793, 'discounted_advantage': -4968671200.262963, 'initial_state': 5074808832.0, 'diff_eval': 113474.30846605197} step=183000
2025-12-06 00:19.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.95it/s, critic_loss=8.93e+17, actor_loss=-4.08e+9]


2025-12-06 00:19.25 [info     ] DDPG_20251205231906: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.004681986570358276, 'time_algorithm_update': 0.01091748023033142, 'critic_loss': 8.921734471894015e+17, 'actor_loss': -4084888688.64, 'time_step': 0.01584920597076416, 'td_error': 7.062653662235485e+17, 'value_scale': 3914062918.598491, 'discounted_advantage': -5061506062.76768, 'initial_state': 5176228352.0, 'diff_eval': 113474.30846605197} step=184000
2025-12-06 00:19.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.94it/s, critic_loss=9.26e+17, actor_loss=-4.17e+9]


2025-12-06 00:19.44 [info     ] DDPG_20251205231906: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.004818952322006226, 'time_algorithm_update': 0.011050405263900756, 'critic_loss': 9.262880672039735e+17, 'actor_loss': -4166674993.664, 'time_step': 0.016117023706436157, 'td_error': 7.343256975018918e+17, 'value_scale': 3990496442.0184407, 'discounted_advantage': -5162086137.431213, 'initial_state': 5277826560.0, 'diff_eval': 113474.30846605197} step=185000
2025-12-06 00:19.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.43it/s, critic_loss=9.56e+17, actor_loss=-4.25e+9]


2025-12-06 00:20.03 [info     ] DDPG_20251205231906: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.004720154523849487, 'time_algorithm_update': 0.01103020167350769, 'critic_loss': 9.557612715052147e+17, 'actor_loss': -4249527665.408, 'time_step': 0.01598598003387451, 'td_error': 7.639207111215425e+17, 'value_scale': 4070378665.6295056, 'discounted_advantage': -5252623208.546558, 'initial_state': 5384131584.0, 'diff_eval': 113474.30846605197} step=186000
2025-12-06 00:20.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.02it/s, critic_loss=9.94e+17, actor_loss=-4.33e+9]


2025-12-06 00:20.22 [info     ] DDPG_20251205231906: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.004820722103118897, 'time_algorithm_update': 0.011011670589447022, 'critic_loss': 9.937463797075002e+17, 'actor_loss': -4332383936.768, 'time_step': 0.01608359479904175, 'td_error': 7.941043452177728e+17, 'value_scale': 4150111612.8616934, 'discounted_advantage': -5347503609.756654, 'initial_state': 5490190848.0, 'diff_eval': 113474.30846605197} step=187000
2025-12-06 00:20.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.85it/s, critic_loss=1.03e+18, actor_loss=-4.41e+9]


2025-12-06 00:20.41 [info     ] DDPG_20251205231906: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.004552865028381347, 'time_algorithm_update': 0.010810821533203126, 'critic_loss': 1.0316077165043222e+18, 'actor_loss': -4414295618.56, 'time_step': 0.015608092784881593, 'td_error': 8.24628361666558e+17, 'value_scale': 4229183984.25482, 'discounted_advantage': -5443725684.016183, 'initial_state': 5595327488.0, 'diff_eval': 113474.30846605197} step=188000
2025-12-06 00:20.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.31it/s, critic_loss=1.07e+18, actor_loss=-4.5e+9]


2025-12-06 00:21.00 [info     ] DDPG_20251205231906: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.004791317224502564, 'time_algorithm_update': 0.010984478950500488, 'critic_loss': 1.071880068102248e+18, 'actor_loss': -4498714096.64, 'time_step': 0.01601701283454895, 'td_error': 8.561905257189404e+17, 'value_scale': 4308743336.207879, 'discounted_advantage': -5549160499.760774, 'initial_state': 5700992512.0, 'diff_eval': 113474.30846605197} step=189000
2025-12-06 00:21.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.51it/s, critic_loss=1.12e+18, actor_loss=-4.58e+9]


2025-12-06 00:21.19 [info     ] DDPG_20251205231906: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.0047918381690979, 'time_algorithm_update': 0.010927048921585083, 'critic_loss': 1.1191642064580499e+18, 'actor_loss': -4582939955.2, 'time_step': 0.01596374726295471, 'td_error': 8.89064966120695e+17, 'value_scale': 4389834886.893546, 'discounted_advantage': -5660997237.6101055, 'initial_state': 5808841216.0, 'diff_eval': 113474.30846605197} step=190000
2025-12-06 00:21.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.09it/s, critic_loss=1.15e+18, actor_loss=-4.67e+9]


2025-12-06 00:21.38 [info     ] DDPG_20251205231906: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.004773852586746216, 'time_algorithm_update': 0.011028233289718628, 'critic_loss': 1.1528792878958258e+18, 'actor_loss': -4669232844.8, 'time_step': 0.01605815076828003, 'td_error': 9.232155288563657e+17, 'value_scale': 4473841615.074602, 'discounted_advantage': -5752344121.605364, 'initial_state': 5920253440.0, 'diff_eval': 113474.30846605197} step=191000
2025-12-06 00:21.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.35it/s, critic_loss=1.21e+18, actor_loss=-4.76e+9]


2025-12-06 00:21.57 [info     ] DDPG_20251205231906: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.004545047283172608, 'time_algorithm_update': 0.010665524005889892, 'critic_loss': 1.2080492302299456e+18, 'actor_loss': -4755308818.432, 'time_step': 0.015481574058532715, 'td_error': 9.57649909340682e+17, 'value_scale': 4555030551.255658, 'discounted_advantage': -5873080513.826099, 'initial_state': 6027738624.0, 'diff_eval': 113474.30846605197} step=192000
2025-12-06 00:21.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.27it/s, critic_loss=1.25e+18, actor_loss=-4.84e+9]


2025-12-06 00:22.16 [info     ] DDPG_20251205231906: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.0048795690536499025, 'time_algorithm_update': 0.011167728662490845, 'critic_loss': 1.2524476570253793e+18, 'actor_loss': -4844306653.184, 'time_step': 0.016286586761474608, 'td_error': 9.934922199649172e+17, 'value_scale': 4638755174.786253, 'discounted_advantage': -5984996763.648958, 'initial_state': 6138782208.0, 'diff_eval': 113474.30846605197} step=193000
2025-12-06 00:22.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.16it/s, critic_loss=1.31e+18, actor_loss=-4.93e+9]


2025-12-06 00:22.36 [info     ] DDPG_20251205231906: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.004799105167388916, 'time_algorithm_update': 0.01098682689666748, 'critic_loss': 1.3074779705911813e+18, 'actor_loss': -4929016295.424, 'time_step': 0.016039243936538695, 'td_error': 1.030837346273911e+18, 'value_scale': 4724720190.605197, 'discounted_advantage': -6093675042.759155, 'initial_state': 6252865536.0, 'diff_eval': 113474.30846605197} step=194000
2025-12-06 00:22.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.61it/s, critic_loss=1.34e+18, actor_loss=-5.02e+9]


2025-12-06 00:22.55 [info     ] DDPG_20251205231906: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.004785715103149414, 'time_algorithm_update': 0.010917199850082398, 'critic_loss': 1.3425105098136637e+18, 'actor_loss': -5023538538.496, 'time_step': 0.015936304569244386, 'td_error': 1.0696719549899359e+18, 'value_scale': 4812858567.778709, 'discounted_advantage': -6198594370.828093, 'initial_state': 6369942016.0, 'diff_eval': 113474.30846605197} step=195000
2025-12-06 00:22.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.19it/s, critic_loss=1.4e+18, actor_loss=-5.11e+9]


2025-12-06 00:23.13 [info     ] DDPG_20251205231906: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.004682233572006225, 'time_algorithm_update': 0.010868134498596192, 'critic_loss': 1.4004903826179868e+18, 'actor_loss': -5113438308.352, 'time_step': 0.01579187059402466, 'td_error': 1.1100012982475149e+18, 'value_scale': 4901934983.26907, 'discounted_advantage': -6317002031.068826, 'initial_state': 6488277504.0, 'diff_eval': 113474.30846605197} step=196000
2025-12-06 00:23.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.07it/s, critic_loss=1.44e+18, actor_loss=-5.21e+9]


2025-12-06 00:23.33 [info     ] DDPG_20251205231906: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.004764461755752563, 'time_algorithm_update': 0.011043700456619263, 'critic_loss': 1.440205635897734e+18, 'actor_loss': -5212112050.688, 'time_step': 0.016060415506362915, 'td_error': 1.1512041850839406e+18, 'value_scale': 4992435639.87259, 'discounted_advantage': -6418070498.243651, 'initial_state': 6608702464.0, 'diff_eval': 113474.30846605197} step=197000
2025-12-06 00:23.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.84it/s, critic_loss=1.5e+18, actor_loss=-5.31e+9]


2025-12-06 00:23.51 [info     ] DDPG_20251205231906: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.004411792516708374, 'time_algorithm_update': 0.010477240324020386, 'critic_loss': 1.5035710347149647e+18, 'actor_loss': -5305702806.528, 'time_step': 0.015124499082565308, 'td_error': 1.1928778184070072e+18, 'value_scale': 5080734771.983235, 'discounted_advantage': -6548532057.942452, 'initial_state': 6726105088.0, 'diff_eval': 113474.30846605197} step=198000
2025-12-06 00:23.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.65it/s, critic_loss=1.56e+18, actor_loss=-5.4e+9]


2025-12-06 00:24.10 [info     ] DDPG_20251205231906: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.0047481367588043215, 'time_algorithm_update': 0.011120374917984009, 'critic_loss': 1.5634871989666893e+18, 'actor_loss': -5399084899.328, 'time_step': 0.016155123949050904, 'td_error': 1.2364206764320717e+18, 'value_scale': 5171958292.573344, 'discounted_advantage': -6668849938.343526, 'initial_state': 6847223808.0, 'diff_eval': 113474.30846605197} step=199000
2025-12-06 00:24.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.48it/s, critic_loss=1.6e+18, actor_loss=-5.49e+9]


2025-12-06 00:24.29 [info     ] DDPG_20251205231906: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.004709667205810547, 'time_algorithm_update': 0.010748018980026246, 'critic_loss': 1.6004818508425178e+18, 'actor_loss': -5495099875.328, 'time_step': 0.015706833124160765, 'td_error': 1.2816157151328415e+18, 'value_scale': 5266166533.445096, 'discounted_advantage': -6772707400.912043, 'initial_state': 6972203520.0, 'diff_eval': 113474.30846605197} step=200000
2025-12-06 00:24.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\DDPG_20251205231906\model_200000.d3
Training model:  TD3
2025-12-06 00:24.29 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=1)
2025-12-06 00:24.29 [debug    ]

Epoch 1/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.79it/s, critic_loss=0.21, actor_loss=-1.13] 


2025-12-06 00:24.49 [info     ] TD3_20251206002429: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.004792696952819824, 'time_algorithm_update': 0.011691516876220703, 'critic_loss': 0.2113052353784442, 'actor_loss': -1.1343899206221104, 'time_step': 0.016718381881713866, 'td_error': 0.769246707958956, 'value_scale': 1.9692260790499176, 'discounted_advantage': -3.102265307468096, 'initial_state': 1.9812402725219727, 'diff_eval': 113460.41205964643} step=1000
2025-12-06 00:24.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.18it/s, critic_loss=0.492, actor_loss=-2.65]


2025-12-06 00:25.10 [info     ] TD3_20251206002429: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.0049387047290802, 'time_algorithm_update': 0.011968345880508423, 'critic_loss': 0.4931291757375002, 'actor_loss': -2.6543768036365507, 'time_step': 0.017166425466537474, 'td_error': 1.4685386684060369, 'value_scale': 3.9087106833157925, 'discounted_advantage': -7.075323951557852, 'initial_state': 3.7243645191192627, 'diff_eval': 113473.66431932857} step=2000
2025-12-06 00:25.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.17it/s, critic_loss=0.891, actor_loss=-4.62]


2025-12-06 00:25.31 [info     ] TD3_20251206002429: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.004645020723342896, 'time_algorithm_update': 0.012000163555145264, 'critic_loss': 0.8918219711780548, 'actor_loss': -4.629995553016663, 'time_step': 0.01688051176071167, 'td_error': 2.4318565424452787, 'value_scale': 6.073754997856834, 'discounted_advantage': -11.897940630176112, 'initial_state': 5.433687686920166, 'diff_eval': 113474.18666801868} step=3000
2025-12-06 00:25.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.22it/s, critic_loss=1.6, actor_loss=-7.51]


2025-12-06 00:25.52 [info     ] TD3_20251206002429: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.004771240472793579, 'time_algorithm_update': 0.011827944993972779, 'critic_loss': 1.603705147087574, 'actor_loss': -7.522799094200134, 'time_step': 0.016854914903640748, 'td_error': 4.760497511255647, 'value_scale': 9.649372371486626, 'discounted_advantage': -19.58894917315792, 'initial_state': 6.621946811676025, 'diff_eval': 113474.27538162065} step=4000
2025-12-06 00:25.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:18<00:00, 53.92it/s, critic_loss=2.51, actor_loss=-11.4]


2025-12-06 00:26.14 [info     ] TD3_20251206002429: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.005061552047729492, 'time_algorithm_update': 0.012883593082427978, 'critic_loss': 2.510604332983494, 'actor_loss': -11.366932428359986, 'time_step': 0.018204780340194703, 'td_error': 8.620521687612344, 'value_scale': 14.048755546402552, 'discounted_advantage': -29.2998334047433, 'initial_state': 8.539236068725586, 'diff_eval': 113474.3083252567} step=5000
2025-12-06 00:26.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:18<00:00, 55.43it/s, critic_loss=3.86, actor_loss=-15.9]


2025-12-06 00:26.36 [info     ] TD3_20251206002429: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.005061893463134765, 'time_algorithm_update': 0.012334694623947144, 'critic_loss': 3.8567848814725876, 'actor_loss': -15.893497167587281, 'time_step': 0.017674629926681517, 'td_error': 10.945203304676195, 'value_scale': 19.47771578175324, 'discounted_advantage': -36.42567512986214, 'initial_state': 12.161785125732422, 'diff_eval': 113474.30845295473} step=6000
2025-12-06 00:26.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.17it/s, critic_loss=5.53, actor_loss=-21.1]


2025-12-06 00:26.57 [info     ] TD3_20251206002429: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.004970561027526856, 'time_algorithm_update': 0.012248953104019165, 'critic_loss': 5.529126428604126, 'actor_loss': -21.097164234161376, 'time_step': 0.017472838401794432, 'td_error': 13.270788206205678, 'value_scale': 25.63311118916714, 'discounted_advantage': -45.64384842141019, 'initial_state': 16.514419555664062, 'diff_eval': 113474.30846605197} step=7000
2025-12-06 00:26.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.78it/s, critic_loss=8.11, actor_loss=-27.3]


2025-12-06 00:27.18 [info     ] TD3_20251206002429: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.004852453231811524, 'time_algorithm_update': 0.012198006868362426, 'critic_loss': 8.145464981079101, 'actor_loss': -27.296839149475097, 'time_step': 0.017300368309020996, 'td_error': 18.93560546481653, 'value_scale': 33.26587607599764, 'discounted_advantage': -57.676765807152705, 'initial_state': 20.17633819580078, 'diff_eval': 113474.30846605197} step=8000
2025-12-06 00:27.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.74it/s, critic_loss=11.2, actor_loss=-34.3]


2025-12-06 00:27.39 [info     ] TD3_20251206002429: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.004871643543243408, 'time_algorithm_update': 0.011859699726104737, 'critic_loss': 11.199939573287963, 'actor_loss': -34.312904029846194, 'time_step': 0.016988826990127562, 'td_error': 23.673027190327026, 'value_scale': 42.04772100439615, 'discounted_advantage': -70.45321635627495, 'initial_state': 26.532915115356445, 'diff_eval': 113474.30843330889} step=9000
2025-12-06 00:27.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.83it/s, critic_loss=15.3, actor_loss=-41.8]


2025-12-06 00:27.59 [info     ] TD3_20251206002429: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.00461119556427002, 'time_algorithm_update': 0.012155456066131592, 'critic_loss': 15.272611141204834, 'actor_loss': -41.83993294525146, 'time_step': 0.016998785972595215, 'td_error': 26.639275851739633, 'value_scale': 50.99426198305448, 'discounted_advantage': -81.37801631502347, 'initial_state': 32.45392608642578, 'diff_eval': 113474.30691894122} step=10000
2025-12-06 00:28.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:19<00:00, 52.25it/s, critic_loss=18.7, actor_loss=-49.9]


2025-12-06 00:28.22 [info     ] TD3_20251206002429: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.004831925630569458, 'time_algorithm_update': 0.013735304594039916, 'critic_loss': 18.797028986930847, 'actor_loss': -49.94378914642334, 'time_step': 0.01881797981262207, 'td_error': 27.39845579599828, 'value_scale': 60.25243177877588, 'discounted_advantage': -87.51582108059813, 'initial_state': 38.90559005737305, 'diff_eval': 113474.30529897711} step=11000
2025-12-06 00:28.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.08it/s, critic_loss=21.4, actor_loss=-60.8]


2025-12-06 00:28.43 [info     ] TD3_20251206002429: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.004628854036331177, 'time_algorithm_update': 0.011737390995025635, 'critic_loss': 21.422284126758576, 'actor_loss': -60.825412002563475, 'time_step': 0.016612487077713012, 'td_error': 42.20487454908536, 'value_scale': 73.01605792017615, 'discounted_advantage': -101.88183393003467, 'initial_state': 51.35517883300781, 'diff_eval': 98039.35186566581} step=12000
2025-12-06 00:28.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.55it/s, critic_loss=30.5, actor_loss=-74.7]


2025-12-06 00:29.03 [info     ] TD3_20251206002429: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.004608095407485962, 'time_algorithm_update': 0.011624974727630615, 'critic_loss': 30.630581300735475, 'actor_loss': -74.79430270385743, 'time_step': 0.016483726739883423, 'td_error': 48.373336252375786, 'value_scale': 85.71650058845462, 'discounted_advantage': -125.58312624239649, 'initial_state': 63.29106903076172, 'diff_eval': 103876.75798515907} step=13000
2025-12-06 00:29.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.83it/s, critic_loss=50.1, actor_loss=-88.8]


2025-12-06 00:29.24 [info     ] TD3_20251206002429: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.004871004343032837, 'time_algorithm_update': 0.011831368923187256, 'critic_loss': 50.05932580089569, 'actor_loss': -88.86163606262207, 'time_step': 0.01696370530128479, 'td_error': 80.08538203008649, 'value_scale': 101.3632431749726, 'discounted_advantage': -157.96873928833028, 'initial_state': 77.46055603027344, 'diff_eval': 102993.10945932326} step=14000
2025-12-06 00:29.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.80it/s, critic_loss=58.7, actor_loss=-104]


2025-12-06 00:29.44 [info     ] TD3_20251206002429: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.004857970476150513, 'time_algorithm_update': 0.011883546590805054, 'critic_loss': 58.51320399188995, 'actor_loss': -104.04685771179199, 'time_step': 0.016982437133789063, 'td_error': 72.17123561227392, 'value_scale': 114.62544186377066, 'discounted_advantage': -164.84483872386167, 'initial_state': 92.00687408447266, 'diff_eval': 100102.24507477705} step=15000
2025-12-06 00:29.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.31it/s, critic_loss=76.6, actor_loss=-119]


2025-12-06 00:30.05 [info     ] TD3_20251206002429: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.004719654083251953, 'time_algorithm_update': 0.011597368955612183, 'critic_loss': 76.57250823020935, 'actor_loss': -119.31534739685058, 'time_step': 0.016553321361541747, 'td_error': 93.00993079300392, 'value_scale': 130.1003597464166, 'discounted_advantage': -187.94267969056298, 'initial_state': 103.46913146972656, 'diff_eval': 104616.35244103479} step=16000
2025-12-06 00:30.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.54it/s, critic_loss=97.1, actor_loss=-135]


2025-12-06 00:30.26 [info     ] TD3_20251206002429: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.004941129207611084, 'time_algorithm_update': 0.012148899793624877, 'critic_loss': 97.15751916885375, 'actor_loss': -135.02981663513182, 'time_step': 0.017357248067855834, 'td_error': 108.76594542551963, 'value_scale': 143.911946439783, 'discounted_advantage': -210.48076131956833, 'initial_state': 113.99224853515625, 'diff_eval': 102294.60679474132} step=17000
2025-12-06 00:30.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.55it/s, critic_loss=119, actor_loss=-151]


2025-12-06 00:30.46 [info     ] TD3_20251206002429: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.004695772171020508, 'time_algorithm_update': 0.011526034116744995, 'critic_loss': 119.50326076126099, 'actor_loss': -151.26251391601562, 'time_step': 0.016471914768218993, 'td_error': 134.8615115947906, 'value_scale': 161.35245689782192, 'discounted_advantage': -234.76851505417463, 'initial_state': 126.0050277709961, 'diff_eval': 104976.55319441676} step=18000
2025-12-06 00:30.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.21it/s, critic_loss=185, actor_loss=-167]


2025-12-06 00:31.06 [info     ] TD3_20251206002429: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.004749865055084229, 'time_algorithm_update': 0.011594134092330932, 'critic_loss': 185.02696301841735, 'actor_loss': -167.500176361084, 'time_step': 0.016584277868270873, 'td_error': 160.4886510963214, 'value_scale': 178.14727824771495, 'discounted_advantage': -254.8479092936103, 'initial_state': 137.96437072753906, 'diff_eval': 104977.40528015485} step=19000
2025-12-06 00:31.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.05it/s, critic_loss=216, actor_loss=-186]


2025-12-06 00:31.27 [info     ] TD3_20251206002429: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.004768415689468384, 'time_algorithm_update': 0.011619289875030518, 'critic_loss': 216.2630022087097, 'actor_loss': -185.96473797607422, 'time_step': 0.01662584972381592, 'td_error': 178.55739824921886, 'value_scale': 197.99568446144175, 'discounted_advantage': -279.7914875453838, 'initial_state': 153.58181762695312, 'diff_eval': 105215.15978831418} step=20000
2025-12-06 00:31.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.68it/s, critic_loss=181, actor_loss=-206]


2025-12-06 00:31.47 [info     ] TD3_20251206002429: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.004646619081497192, 'time_algorithm_update': 0.011555819749832153, 'critic_loss': 180.9942029132843, 'actor_loss': -206.1497273864746, 'time_step': 0.016449905157089235, 'td_error': 195.85872823038588, 'value_scale': 220.61298612097383, 'discounted_advantage': -302.5005641473034, 'initial_state': 174.2020721435547, 'diff_eval': 105373.89365723744} step=21000
2025-12-06 00:31.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.70it/s, critic_loss=199, actor_loss=-226]


2025-12-06 00:32.06 [info     ] TD3_20251206002429: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.004397831678390503, 'time_algorithm_update': 0.010997403860092164, 'critic_loss': 199.8807349281311, 'actor_loss': -225.78182583618164, 'time_step': 0.015649059057235716, 'td_error': 207.17795478779595, 'value_scale': 240.62820705723982, 'discounted_advantage': -328.3851750928314, 'initial_state': 190.49285888671875, 'diff_eval': 103739.26188031134} step=22000
2025-12-06 00:32.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.28it/s, critic_loss=179, actor_loss=-248]


2025-12-06 00:32.27 [info     ] TD3_20251206002429: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.004723575353622437, 'time_algorithm_update': 0.012153651714324951, 'critic_loss': 177.944179145813, 'actor_loss': -248.44969918823242, 'time_step': 0.017138304471969606, 'td_error': 216.6926626612363, 'value_scale': 266.28821236289855, 'discounted_advantage': -356.51297234707124, 'initial_state': 222.23504638671875, 'diff_eval': 113474.30846605197} step=23000
2025-12-06 00:32.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.06it/s, critic_loss=59.2, actor_loss=-274]


2025-12-06 00:32.47 [info     ] TD3_20251206002429: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.004747230291366577, 'time_algorithm_update': 0.011633525848388672, 'critic_loss': 59.341769241333004, 'actor_loss': -273.74415991210935, 'time_step': 0.016622987508773803, 'td_error': 216.14365321003706, 'value_scale': 292.00129649185476, 'discounted_advantage': -366.4719522318397, 'initial_state': 245.8232879638672, 'diff_eval': 113474.30846605197} step=24000
2025-12-06 00:32.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.30it/s, critic_loss=60.8, actor_loss=-298]


2025-12-06 00:33.08 [info     ] TD3_20251206002429: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.004822919368743897, 'time_algorithm_update': 0.01176444125175476, 'critic_loss': 60.795069156646726, 'actor_loss': -298.5748271484375, 'time_step': 0.016839884996414183, 'td_error': 272.2640859963751, 'value_scale': 318.9212148495312, 'discounted_advantage': -421.2920241128386, 'initial_state': 268.83221435546875, 'diff_eval': 113474.30846605197} step=25000
2025-12-06 00:33.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.70it/s, critic_loss=70.4, actor_loss=-327]


2025-12-06 00:33.28 [info     ] TD3_20251206002429: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.004663862705230713, 'time_algorithm_update': 0.011524356365203858, 'critic_loss': 70.40335931968688, 'actor_loss': -326.6426920776367, 'time_step': 0.01643198847770691, 'td_error': 292.2438041570852, 'value_scale': 347.4701434868479, 'discounted_advantage': -452.97684645260256, 'initial_state': 292.0541687011719, 'diff_eval': 113474.30846605197} step=26000
2025-12-06 00:33.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.71it/s, critic_loss=77.8, actor_loss=-357]


2025-12-06 00:33.48 [info     ] TD3_20251206002429: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.004812734365463257, 'time_algorithm_update': 0.011670601844787598, 'critic_loss': 77.85108555221558, 'actor_loss': -357.1461452026367, 'time_step': 0.016733219861984252, 'td_error': 378.33556513075246, 'value_scale': 381.1453977010876, 'discounted_advantage': -501.6445149824599, 'initial_state': 320.79766845703125, 'diff_eval': 113474.30846605197} step=27000
2025-12-06 00:33.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.55it/s, critic_loss=87.6, actor_loss=-390]


2025-12-06 00:34.09 [info     ] TD3_20251206002429: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.004787086248397827, 'time_algorithm_update': 0.01173693299293518, 'critic_loss': 87.68558131790161, 'actor_loss': -389.8155874633789, 'time_step': 0.016776005029678343, 'td_error': 381.3930072852978, 'value_scale': 413.3948984713686, 'discounted_advantage': -536.2717694683953, 'initial_state': 347.55120849609375, 'diff_eval': 113474.30846605197} step=28000
2025-12-06 00:34.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.69it/s, critic_loss=96.9, actor_loss=-420]


2025-12-06 00:34.29 [info     ] TD3_20251206002429: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.004881954431533814, 'time_algorithm_update': 0.011849599838256836, 'critic_loss': 96.85922986984252, 'actor_loss': -420.20343811035156, 'time_step': 0.01699270987510681, 'td_error': 453.8692808412184, 'value_scale': 448.3948741570702, 'discounted_advantage': -582.5515171309149, 'initial_state': 375.6479797363281, 'diff_eval': 113474.30846605197} step=29000
2025-12-06 00:34.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.95it/s, critic_loss=111, actor_loss=-455]


2025-12-06 00:34.50 [info     ] TD3_20251206002429: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.00480199408531189, 'time_algorithm_update': 0.011578827619552612, 'critic_loss': 111.19001895523071, 'actor_loss': -455.12467541503906, 'time_step': 0.016639207124710082, 'td_error': 535.1991823825414, 'value_scale': 488.1702576578912, 'discounted_advantage': -625.3946510662031, 'initial_state': 408.23992919921875, 'diff_eval': 113474.30846605197} step=30000
2025-12-06 00:34.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.61it/s, critic_loss=136, actor_loss=-494]


2025-12-06 00:35.10 [info     ] TD3_20251206002429: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.004592020750045776, 'time_algorithm_update': 0.01136364507675171, 'critic_loss': 135.50998134231568, 'actor_loss': -494.35497680664065, 'time_step': 0.016205225944519043, 'td_error': 574.0610598635618, 'value_scale': 529.5039587028868, 'discounted_advantage': -670.2009393019662, 'initial_state': 442.8946533203125, 'diff_eval': 113474.30846605197} step=31000
2025-12-06 00:35.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.65it/s, critic_loss=160, actor_loss=-536]


2025-12-06 00:35.30 [info     ] TD3_20251206002429: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.004828484773635865, 'time_algorithm_update': 0.011960549354553223, 'critic_loss': 160.2113369102478, 'actor_loss': -536.4922534790039, 'time_step': 0.017046477556228637, 'td_error': 740.2579393338328, 'value_scale': 575.4471762338209, 'discounted_advantage': -737.7588660162148, 'initial_state': 480.308837890625, 'diff_eval': 113474.30846605197} step=32000
2025-12-06 00:35.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.02it/s, critic_loss=198, actor_loss=-582]


2025-12-06 00:35.51 [info     ] TD3_20251206002429: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.00484187912940979, 'time_algorithm_update': 0.012136215686798096, 'critic_loss': 197.94743258666992, 'actor_loss': -582.2495081787109, 'time_step': 0.017229769468307496, 'td_error': 903.3469069785464, 'value_scale': 626.1106396504839, 'discounted_advantage': -782.1641804466958, 'initial_state': 521.2297973632812, 'diff_eval': 113474.30846605197} step=33000
2025-12-06 00:35.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:18<00:00, 53.57it/s, critic_loss=251, actor_loss=-632]


2025-12-06 00:36.13 [info     ] TD3_20251206002429: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.005676899433135986, 'time_algorithm_update': 0.012358529329299926, 'critic_loss': 250.91379988098146, 'actor_loss': -632.3846330566406, 'time_step': 0.018306984424591066, 'td_error': 1211.658262829171, 'value_scale': 683.777212521856, 'discounted_advantage': -904.9354881267052, 'initial_state': 567.5458374023438, 'diff_eval': 113474.30846605197} step=34000
2025-12-06 00:36.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.00it/s, critic_loss=314, actor_loss=-688]


2025-12-06 00:36.34 [info     ] TD3_20251206002429: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.004874198913574219, 'time_algorithm_update': 0.011507933139801025, 'critic_loss': 315.5272917938232, 'actor_loss': -688.2924935302734, 'time_step': 0.01662899112701416, 'td_error': 1387.636529623088, 'value_scale': 744.3884962134669, 'discounted_advantage': -973.8640991867948, 'initial_state': 615.100830078125, 'diff_eval': 113474.30846605197} step=35000
2025-12-06 00:36.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.96it/s, critic_loss=376, actor_loss=-748]


2025-12-06 00:36.54 [info     ] TD3_20251206002429: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.004605229616165161, 'time_algorithm_update': 0.011814373970031739, 'critic_loss': 376.18961318969724, 'actor_loss': -747.930017944336, 'time_step': 0.01666693305969238, 'td_error': 1777.2772097689726, 'value_scale': 811.5555242137158, 'discounted_advantage': -1081.922088839022, 'initial_state': 668.9212646484375, 'diff_eval': 113474.30846605197} step=36000
2025-12-06 00:36.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.97it/s, critic_loss=473, actor_loss=-812]


2025-12-06 00:37.14 [info     ] TD3_20251206002429: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.004748204469680786, 'time_algorithm_update': 0.011600741147994996, 'critic_loss': 474.274842590332, 'actor_loss': -812.4261773681641, 'time_step': 0.016608969449996947, 'td_error': 2130.7588889296567, 'value_scale': 884.5984942427425, 'discounted_advantage': -1187.9881604954078, 'initial_state': 728.9638061523438, 'diff_eval': 113474.30846605197} step=37000
2025-12-06 00:37.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.99it/s, critic_loss=575, actor_loss=-882]


2025-12-06 00:37.34 [info     ] TD3_20251206002429: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.004673938035964966, 'time_algorithm_update': 0.011441812038421631, 'critic_loss': 576.8495928955078, 'actor_loss': -882.0910048828125, 'time_step': 0.01636661171913147, 'td_error': 2181.8311691396093, 'value_scale': 962.6974396749594, 'discounted_advantage': -1194.1638980128553, 'initial_state': 796.577880859375, 'diff_eval': 113474.30846605197} step=38000
2025-12-06 00:37.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.48it/s, critic_loss=667, actor_loss=-955]


2025-12-06 00:37.55 [info     ] TD3_20251206002429: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.004693623781204224, 'time_algorithm_update': 0.012134462118148804, 'critic_loss': 667.569385635376, 'actor_loss': -955.6967958984375, 'time_step': 0.01708344054222107, 'td_error': 2688.8758056110764, 'value_scale': 1048.1994881386129, 'discounted_advantage': -1341.959268836574, 'initial_state': 871.7352905273438, 'diff_eval': 113474.30846605197} step=39000
2025-12-06 00:37.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.50it/s, critic_loss=814, actor_loss=-1.03e+3] 


2025-12-06 00:38.16 [info     ] TD3_20251206002429: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.004629150390625, 'time_algorithm_update': 0.01221990466117859, 'critic_loss': 821.8721497955322, 'actor_loss': -1034.8169020996095, 'time_step': 0.017088940382003783, 'td_error': 2886.4182269281496, 'value_scale': 1134.3774123467624, 'discounted_advantage': -1448.4189043271915, 'initial_state': 948.1961669921875, 'diff_eval': 113474.30846605197} step=40000
2025-12-06 00:38.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.34it/s, critic_loss=972, actor_loss=-1.12e+3]


2025-12-06 00:38.35 [info     ] TD3_20251206002429: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.004538995981216431, 'time_algorithm_update': 0.011225128173828125, 'critic_loss': 972.4845797729492, 'actor_loss': -1117.6690954589844, 'time_step': 0.01600709867477417, 'td_error': 3518.0401927303687, 'value_scale': 1234.484814831717, 'discounted_advantage': -1556.3434357952474, 'initial_state': 1040.1156005859375, 'diff_eval': 113474.30846605197} step=41000
2025-12-06 00:38.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.30it/s, critic_loss=1.14e+3, actor_loss=-1.21e+3]


2025-12-06 00:38.55 [info     ] TD3_20251206002429: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.004724195003509521, 'time_algorithm_update': 0.011594075679779053, 'critic_loss': 1140.517876953125, 'actor_loss': -1209.2906196289061, 'time_step': 0.01655213189125061, 'td_error': 3914.0415282674676, 'value_scale': 1332.2513451780078, 'discounted_advantage': -1677.9053574252732, 'initial_state': 1128.16064453125, 'diff_eval': 113474.30846605197} step=42000
2025-12-06 00:38.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.05it/s, critic_loss=1.41e+3, actor_loss=-1.31e+3]


2025-12-06 00:39.15 [info     ] TD3_20251206002429: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.004685811996459961, 'time_algorithm_update': 0.011433244466781615, 'critic_loss': 1402.2802458496094, 'actor_loss': -1305.6746228027343, 'time_step': 0.016364021062850953, 'td_error': 4436.885543566986, 'value_scale': 1439.998669249414, 'discounted_advantage': -1825.8623768646307, 'initial_state': 1227.655029296875, 'diff_eval': 113474.30846605197} step=43000
2025-12-06 00:39.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.06it/s, critic_loss=1.68e+3, actor_loss=-1.4e+3]


2025-12-06 00:39.36 [info     ] TD3_20251206002429: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.004748433113098144, 'time_algorithm_update': 0.01164189600944519, 'critic_loss': 1676.7275509643555, 'actor_loss': -1405.3955959472655, 'time_step': 0.016624895572662352, 'td_error': 4918.595311401876, 'value_scale': 1549.0648873289992, 'discounted_advantage': -1881.3205046339092, 'initial_state': 1324.946533203125, 'diff_eval': 113474.30846605197} step=44000
2025-12-06 00:39.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.23it/s, critic_loss=1.71e+3, actor_loss=-1.51e+3]


2025-12-06 00:39.55 [info     ] TD3_20251206002429: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.004434608697891236, 'time_algorithm_update': 0.011113343954086304, 'critic_loss': 1728.9183153076171, 'actor_loss': -1513.1666962890624, 'time_step': 0.015785085201263427, 'td_error': 6113.4390576688, 'value_scale': 1664.895479264623, 'discounted_advantage': -2142.7067083974985, 'initial_state': 1433.97119140625, 'diff_eval': 113474.30846605197} step=45000
2025-12-06 00:39.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.83it/s, critic_loss=2.24e+3, actor_loss=-1.63e+3]


2025-12-06 00:40.16 [info     ] TD3_20251206002429: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.004800654411315918, 'time_algorithm_update': 0.011893324375152588, 'critic_loss': 2236.6784041748047, 'actor_loss': -1627.1379809570312, 'time_step': 0.016955174207687378, 'td_error': 6514.117335707224, 'value_scale': 1790.817025708233, 'discounted_advantage': -2192.373437970866, 'initial_state': 1547.367919921875, 'diff_eval': 113474.30846605197} step=46000
2025-12-06 00:40.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.90it/s, critic_loss=2.47e+3, actor_loss=-1.75e+3]


2025-12-06 00:40.37 [info     ] TD3_20251206002429: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.004675115346908569, 'time_algorithm_update': 0.012322853803634643, 'critic_loss': 2523.163432067871, 'actor_loss': -1747.5516123046875, 'time_step': 0.017238268852233885, 'td_error': 8123.84989880931, 'value_scale': 1924.411434867456, 'discounted_advantage': -2419.0656234311214, 'initial_state': 1664.6910400390625, 'diff_eval': 113474.30846605197} step=47000
2025-12-06 00:40.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.03it/s, critic_loss=2.88e+3, actor_loss=-1.88e+3]


2025-12-06 00:40.57 [info     ] TD3_20251206002429: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.00472986388206482, 'time_algorithm_update': 0.011643722057342529, 'critic_loss': 2916.699423095703, 'actor_loss': -1882.3393884277343, 'time_step': 0.01662806749343872, 'td_error': 9292.017467761365, 'value_scale': 2064.622671762036, 'discounted_advantage': -2626.971702208165, 'initial_state': 1789.8516845703125, 'diff_eval': 113474.30846605197} step=48000
2025-12-06 00:40.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.74it/s, critic_loss=3.61e+3, actor_loss=-2.02e+3]


2025-12-06 00:41.17 [info     ] TD3_20251206002429: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.00470979380607605, 'time_algorithm_update': 0.011471752643585204, 'critic_loss': 3646.349381591797, 'actor_loss': -2021.5103469238281, 'time_step': 0.016422028064727784, 'td_error': 10530.12567629866, 'value_scale': 2218.583652135013, 'discounted_advantage': -2817.7320153712803, 'initial_state': 1925.9749755859375, 'diff_eval': 113474.30846605197} step=49000
2025-12-06 00:41.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.92it/s, critic_loss=4.54e+3, actor_loss=-2.18e+3]


2025-12-06 00:41.38 [info     ] TD3_20251206002429: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.004776692390441895, 'time_algorithm_update': 0.011922878742218018, 'critic_loss': 4530.112627441406, 'actor_loss': -2178.9370102539065, 'time_step': 0.016943288326263427, 'td_error': 13302.914186959375, 'value_scale': 2386.5949124818276, 'discounted_advantage': -3113.675623274499, 'initial_state': 2077.718505859375, 'diff_eval': 113474.30846605197} step=50000
2025-12-06 00:41.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.31it/s, critic_loss=4.93e+3, actor_loss=-2.35e+3]


2025-12-06 00:41.58 [info     ] TD3_20251206002429: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.004603188276290893, 'time_algorithm_update': 0.0114405038356781, 'critic_loss': 4965.015500610351, 'actor_loss': -2347.5753525390624, 'time_step': 0.016276440620422362, 'td_error': 13980.191731741406, 'value_scale': 2565.866911946478, 'discounted_advantage': -3152.4582764446222, 'initial_state': 2236.906005859375, 'diff_eval': 113474.30846605197} step=51000
2025-12-06 00:41.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.64it/s, critic_loss=5.61e+3, actor_loss=-2.53e+3]


2025-12-06 00:42.18 [info     ] TD3_20251206002429: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.0048226420879364015, 'time_algorithm_update': 0.011647672176361083, 'critic_loss': 5670.44706237793, 'actor_loss': -2530.410712890625, 'time_step': 0.016723707914352418, 'td_error': 17267.194860067848, 'value_scale': 2761.390203432785, 'discounted_advantage': -3529.9785256265895, 'initial_state': 2414.14599609375, 'diff_eval': 113474.30846605197} step=52000
2025-12-06 00:42.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.65it/s, critic_loss=7.14e+3, actor_loss=-2.73e+3]


2025-12-06 00:42.38 [info     ] TD3_20251206002429: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.004577350616455078, 'time_algorithm_update': 0.01131329083442688, 'critic_loss': 7198.487469116211, 'actor_loss': -2732.122642578125, 'time_step': 0.016156457185745238, 'td_error': 18354.626889026575, 'value_scale': 2970.0583233125853, 'discounted_advantage': -3679.828063949791, 'initial_state': 2602.30908203125, 'diff_eval': 113474.30846605197} step=53000
2025-12-06 00:42.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.63it/s, critic_loss=7.67e+3, actor_loss=-2.94e+3]


2025-12-06 00:42.58 [info     ] TD3_20251206002429: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.004544095277786255, 'time_algorithm_update': 0.011404949188232422, 'critic_loss': 7656.012489013672, 'actor_loss': -2941.8377919921877, 'time_step': 0.0161980984210968, 'td_error': 22792.89854424241, 'value_scale': 3196.0290528366972, 'discounted_advantage': -4037.0413424597486, 'initial_state': 2810.657470703125, 'diff_eval': 113474.30846605197} step=54000
2025-12-06 00:42.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.29it/s, critic_loss=9.31e+3, actor_loss=-3.16e+3]


2025-12-06 00:43.18 [info     ] TD3_20251206002429: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.004610633134841919, 'time_algorithm_update': 0.011446961641311645, 'critic_loss': 9283.868631835938, 'actor_loss': -3165.1418735351563, 'time_step': 0.01629888653755188, 'td_error': 25468.428293156205, 'value_scale': 3435.634480350875, 'discounted_advantage': -4239.691630472182, 'initial_state': 3034.66259765625, 'diff_eval': 113474.30846605197} step=55000
2025-12-06 00:43.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.99it/s, critic_loss=1.16e+4, actor_loss=-3.41e+3]


2025-12-06 00:43.38 [info     ] TD3_20251206002429: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.00487839961051941, 'time_algorithm_update': 0.011794704675674438, 'critic_loss': 11591.389799316406, 'actor_loss': -3412.4159794921875, 'time_step': 0.016919416904449463, 'td_error': 28851.606955552616, 'value_scale': 3691.1171043121003, 'discounted_advantage': -4616.78926261636, 'initial_state': 3269.6875, 'diff_eval': 113474.30846605197} step=56000
2025-12-06 00:43.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.02it/s, critic_loss=1.41e+4, actor_loss=-3.69e+3]


2025-12-06 00:43.59 [info     ] TD3_20251206002429: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.00478258204460144, 'time_algorithm_update': 0.011615485191345214, 'critic_loss': 14081.002708984375, 'actor_loss': -3687.8658994140624, 'time_step': 0.016648046016693115, 'td_error': 39196.95666163864, 'value_scale': 3997.1541764418416, 'discounted_advantage': -5157.7840862476605, 'initial_state': 3557.8486328125, 'diff_eval': 113474.30846605197} step=57000
2025-12-06 00:43.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.33it/s, critic_loss=1.63e+4, actor_loss=-3.99e+3]


2025-12-06 00:44.19 [info     ] TD3_20251206002429: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.004682135581970215, 'time_algorithm_update': 0.011359453439712524, 'critic_loss': 16287.895144042968, 'actor_loss': -3993.0017197265624, 'time_step': 0.016290050506591797, 'td_error': 41991.411560192355, 'value_scale': 4299.5701242272635, 'discounted_advantage': -5410.662692933349, 'initial_state': 3827.35009765625, 'diff_eval': 113474.30846605197} step=58000
2025-12-06 00:44.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.40it/s, critic_loss=1.91e+4, actor_loss=-4.31e+3]


2025-12-06 00:44.38 [info     ] TD3_20251206002429: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.004672073602676392, 'time_algorithm_update': 0.011314268350601196, 'critic_loss': 19089.323091308594, 'actor_loss': -4307.802303710938, 'time_step': 0.01624043869972229, 'td_error': 52940.60146843375, 'value_scale': 4644.102232403047, 'discounted_advantage': -5962.64171357807, 'initial_state': 4128.65869140625, 'diff_eval': 113474.30846605197} step=59000
2025-12-06 00:44.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.74it/s, critic_loss=2.39e+4, actor_loss=-4.66e+3]


2025-12-06 00:44.59 [info     ] TD3_20251206002429: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.004774138689041137, 'time_algorithm_update': 0.01169405460357666, 'critic_loss': 23912.171832519532, 'actor_loss': -4663.343365234375, 'time_step': 0.016718100547790528, 'td_error': 71985.26346848306, 'value_scale': 5039.0414341225505, 'discounted_advantage': -6685.879733658014, 'initial_state': 4481.671875, 'diff_eval': 113474.30846605197} step=60000
2025-12-06 00:44.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.23it/s, critic_loss=2.84e+4, actor_loss=-5.06e+3]


2025-12-06 00:45.19 [info     ] TD3_20251206002429: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.0046375575065612795, 'time_algorithm_update': 0.011431625127792359, 'critic_loss': 28339.408390625, 'actor_loss': -5064.4941181640625, 'time_step': 0.0163026225566864, 'td_error': 88508.53471065078, 'value_scale': 5474.678482554485, 'discounted_advantage': -7201.9255594183105, 'initial_state': 4870.20703125, 'diff_eval': 113474.30846605197} step=61000
2025-12-06 00:45.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.11it/s, critic_loss=3.41e+4, actor_loss=-5.5e+3]


2025-12-06 00:45.39 [info     ] TD3_20251206002429: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.004686646699905396, 'time_algorithm_update': 0.011400497198104858, 'critic_loss': 34026.285140625, 'actor_loss': -5503.72649609375, 'time_step': 0.016340173244476317, 'td_error': 104030.58963341724, 'value_scale': 5968.072906366238, 'discounted_advantage': -7847.616167559773, 'initial_state': 5321.2861328125, 'diff_eval': 113474.30846605197} step=62000
2025-12-06 00:45.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.68it/s, critic_loss=4.44e+4, actor_loss=-6e+3]  


2025-12-06 00:45.59 [info     ] TD3_20251206002429: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.004708495378494263, 'time_algorithm_update': 0.011510224819183349, 'critic_loss': 44527.17140917969, 'actor_loss': -6006.924236328125, 'time_step': 0.016463355541229248, 'td_error': 143373.54262723128, 'value_scale': 6529.311084066233, 'discounted_advantage': -8801.757042592111, 'initial_state': 5812.32763671875, 'diff_eval': 113474.30846605197} step=63000
2025-12-06 00:45.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.90it/s, critic_loss=5.67e+4, actor_loss=-6.59e+3]


2025-12-06 00:46.19 [info     ] TD3_20251206002429: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.0048507599830627446, 'time_algorithm_update': 0.011566811323165893, 'critic_loss': 56604.22639648437, 'actor_loss': -6596.959415039062, 'time_step': 0.016666416645050048, 'td_error': 176084.657364157, 'value_scale': 7178.961856557457, 'discounted_advantage': -9489.37768097203, 'initial_state': 6402.00732421875, 'diff_eval': 113474.30846605197} step=64000
2025-12-06 00:46.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.24it/s, critic_loss=7.71e+4, actor_loss=-7.24e+3]


2025-12-06 00:46.40 [info     ] TD3_20251206002429: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.004564779996871948, 'time_algorithm_update': 0.011494623184204102, 'critic_loss': 77082.13281640624, 'actor_loss': -7243.090338867188, 'time_step': 0.016303876161575317, 'td_error': 225598.36523504683, 'value_scale': 7896.508111485292, 'discounted_advantage': -10502.370906467506, 'initial_state': 7057.29443359375, 'diff_eval': 113474.30846605197} step=65000
2025-12-06 00:46.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.96it/s, critic_loss=1.02e+5, actor_loss=-7.98e+3]


2025-12-06 00:47.00 [info     ] TD3_20251206002429: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.004706076860427857, 'time_algorithm_update': 0.011395334005355836, 'critic_loss': 102453.12792578126, 'actor_loss': -7980.043865234375, 'time_step': 0.01636790132522583, 'td_error': 326453.225661273, 'value_scale': 8771.28000555814, 'discounted_advantage': -11940.751989398961, 'initial_state': 7854.1611328125, 'diff_eval': 113474.30846605197} step=66000
2025-12-06 00:47.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.28it/s, critic_loss=1.36e+5, actor_loss=-8.88e+3]


2025-12-06 00:47.20 [info     ] TD3_20251206002429: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.004631257534027099, 'time_algorithm_update': 0.011391287803649902, 'critic_loss': 135792.984734375, 'actor_loss': -8881.3700703125, 'time_step': 0.016275110721588135, 'td_error': 476030.43361860147, 'value_scale': 9792.359618526692, 'discounted_advantage': -13786.73080101073, 'initial_state': 8762.6708984375, 'diff_eval': 113474.30846605197} step=67000
2025-12-06 00:47.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.78it/s, critic_loss=2e+5, actor_loss=-9.97e+3]  


2025-12-06 00:47.40 [info     ] TD3_20251206002429: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.004805382013320923, 'time_algorithm_update': 0.011655838251113892, 'critic_loss': 200626.4309296875, 'actor_loss': -9979.89533203125, 'time_step': 0.016702069282531738, 'td_error': 691831.7962414981, 'value_scale': 11025.506596094405, 'discounted_advantage': -15937.012662584166, 'initial_state': 9864.5439453125, 'diff_eval': 113474.30846605197} step=68000
2025-12-06 00:47.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.42it/s, critic_loss=3.15e+5, actor_loss=-1.13e+4]


2025-12-06 00:48.00 [info     ] TD3_20251206002429: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.00461374044418335, 'time_algorithm_update': 0.011408420324325562, 'critic_loss': 316176.57884375, 'actor_loss': -11347.776951171874, 'time_step': 0.01625827097892761, 'td_error': 1086545.2186293623, 'value_scale': 12629.49632377017, 'discounted_advantage': -18677.927722555833, 'initial_state': 11326.9951171875, 'diff_eval': 113474.30846605197} step=69000
2025-12-06 00:48.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.67it/s, critic_loss=5.39e+5, actor_loss=-1.32e+4]


2025-12-06 00:48.21 [info     ] TD3_20251206002429: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.004829220294952393, 'time_algorithm_update': 0.01168079686164856, 'critic_loss': 541369.1935, 'actor_loss': -13163.064232421875, 'time_step': 0.01674803113937378, 'td_error': 1815059.4374664384, 'value_scale': 14734.034681065983, 'discounted_advantage': -22262.506730460944, 'initial_state': 13216.2177734375, 'diff_eval': 113474.30846605197} step=70000
2025-12-06 00:48.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.92it/s, critic_loss=9.69e+5, actor_loss=-1.56e+4]


2025-12-06 00:48.41 [info     ] TD3_20251206002429: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.00473087477684021, 'time_algorithm_update': 0.011708521127700806, 'critic_loss': 971685.655375, 'actor_loss': -15566.8528984375, 'time_step': 0.016666350841522216, 'td_error': 3294714.8898742623, 'value_scale': 17444.653951844746, 'discounted_advantage': -27992.37867017684, 'initial_state': 15658.70703125, 'diff_eval': 113474.30846605197} step=71000
2025-12-06 00:48.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.81it/s, critic_loss=1.93e+6, actor_loss=-1.88e+4]


2025-12-06 00:49.00 [info     ] TD3_20251206002429: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.004566612720489502, 'time_algorithm_update': 0.0110983726978302, 'critic_loss': 1936609.573375, 'actor_loss': -18768.4700703125, 'time_step': 0.015890913724899293, 'td_error': 5869942.975704631, 'value_scale': 21159.77652026142, 'discounted_advantage': -33825.96315174283, 'initial_state': 19061.57421875, 'diff_eval': 113474.30846605197} step=72000
2025-12-06 00:49.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.55it/s, critic_loss=4.04e+6, actor_loss=-2.31e+4]


2025-12-06 00:49.21 [info     ] TD3_20251206002429: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.005003896236419678, 'time_algorithm_update': 0.01151660943031311, 'critic_loss': 4055552.99875, 'actor_loss': -23116.40959765625, 'time_step': 0.016762248039245607, 'td_error': 11049167.479646899, 'value_scale': 26027.111734957827, 'discounted_advantage': -43229.19169611549, 'initial_state': 23564.064453125, 'diff_eval': 113474.30846605197} step=73000
2025-12-06 00:49.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.08it/s, critic_loss=8.66e+6, actor_loss=-2.89e+4]


2025-12-06 00:49.42 [info     ] TD3_20251206002429: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.004844218254089355, 'time_algorithm_update': 0.011818480014801026, 'critic_loss': 8687026.8735, 'actor_loss': -28951.1342265625, 'time_step': 0.01690618896484375, 'td_error': 21617277.201252095, 'value_scale': 32527.890470288927, 'discounted_advantage': -57511.59142402951, 'initial_state': 29604.04296875, 'diff_eval': 113474.30846605197} step=74000
2025-12-06 00:49.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.16it/s, critic_loss=1.89e+7, actor_loss=-3.7e+4]


2025-12-06 00:50.02 [info     ] TD3_20251206002429: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.004778956174850464, 'time_algorithm_update': 0.01156308126449585, 'critic_loss': 18975908.1275, 'actor_loss': -37075.78196875, 'time_step': 0.016591331720352175, 'td_error': 43396918.19619656, 'value_scale': 42135.241882989314, 'discounted_advantage': -75086.27516447345, 'initial_state': 38903.70703125, 'diff_eval': 113474.30846605197} step=75000
2025-12-06 00:50.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.57it/s, critic_loss=4.28e+7, actor_loss=-4.83e+4]


2025-12-06 00:50.22 [info     ] TD3_20251206002429: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.004611421823501587, 'time_algorithm_update': 0.011359386682510375, 'critic_loss': 43052909.864, 'actor_loss': -48384.884328125, 'time_step': 0.016208699226379394, 'td_error': 80436673.11619866, 'value_scale': 54775.668191402976, 'discounted_advantage': -98321.6506738753, 'initial_state': 51331.86328125, 'diff_eval': 113474.30846605197} step=76000
2025-12-06 00:50.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.99it/s, critic_loss=9.18e+7, actor_loss=-6.28e+4]


2025-12-06 00:50.41 [info     ] TD3_20251206002429: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.004579325437545776, 'time_algorithm_update': 0.011284260034561157, 'critic_loss': 92147230.992, 'actor_loss': -62825.5306328125, 'time_step': 0.01610562992095947, 'td_error': 143677737.72464377, 'value_scale': 70308.43993117404, 'discounted_advantage': -128442.84725261282, 'initial_state': 66490.5390625, 'diff_eval': 113474.30846605197} step=77000
2025-12-06 00:50.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.30it/s, critic_loss=1.79e+8, actor_loss=-8.05e+4]


2025-12-06 00:51.02 [info     ] TD3_20251206002429: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.004804887533187866, 'time_algorithm_update': 0.011482190608978272, 'critic_loss': 179552557.792, 'actor_loss': -80543.237265625, 'time_step': 0.01653762364387512, 'td_error': 230958394.67780805, 'value_scale': 89419.41793600691, 'discounted_advantage': -154342.66491494986, 'initial_state': 85474.671875, 'diff_eval': 113474.30846605197} step=78000
2025-12-06 00:51.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.19it/s, critic_loss=3.27e+8, actor_loss=-1.02e+5]


2025-12-06 00:51.22 [info     ] TD3_20251206002429: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.004807450294494629, 'time_algorithm_update': 0.011539416790008545, 'critic_loss': 327749486.448, 'actor_loss': -101971.52490625, 'time_step': 0.016596893310546874, 'td_error': 382792146.90842414, 'value_scale': 111823.71499109388, 'discounted_advantage': -199030.43423884932, 'initial_state': 108091.296875, 'diff_eval': 113474.30846605197} step=79000
2025-12-06 00:51.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.72it/s, critic_loss=5.8e+8, actor_loss=-1.28e+5]


2025-12-06 00:51.43 [info     ] TD3_20251206002429: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.0046546895503997805, 'time_algorithm_update': 0.011811589956283569, 'critic_loss': 581226091.872, 'actor_loss': -128065.97240625, 'time_step': 0.01671831440925598, 'td_error': 622269173.432104, 'value_scale': 140092.16617770327, 'discounted_advantage': -246023.10607606132, 'initial_state': 136809.921875, 'diff_eval': 113474.30846605197} step=80000
2025-12-06 00:51.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.32it/s, critic_loss=1e+9, actor_loss=-1.6e+5]   


2025-12-06 00:52.03 [info     ] TD3_20251206002429: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.004747435092926026, 'time_algorithm_update': 0.011812781572341918, 'critic_loss': 1005418209.344, 'actor_loss': -160607.5845625, 'time_step': 0.016811033964157104, 'td_error': 1019254561.6002724, 'value_scale': 175015.342125943, 'discounted_advantage': -307638.1604138631, 'initial_state': 172977.0, 'diff_eval': 113474.30846605197} step=81000
2025-12-06 00:52.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.83it/s, critic_loss=1.69e+9, actor_loss=-2e+5]  


2025-12-06 00:52.23 [info     ] TD3_20251206002429: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.0044953105449676515, 'time_algorithm_update': 0.01114162278175354, 'critic_loss': 1693580646.336, 'actor_loss': -200375.24253125, 'time_step': 0.015872095584869385, 'td_error': 1567871722.9054904, 'value_scale': 217622.30995258802, 'discounted_advantage': -363511.4706759627, 'initial_state': 217267.65625, 'diff_eval': 113474.30846605197} step=82000
2025-12-06 00:52.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.32it/s, critic_loss=2.82e+9, actor_loss=-2.48e+5]


2025-12-06 00:52.43 [info     ] TD3_20251206002429: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.004634152889251709, 'time_algorithm_update': 0.011383368492126464, 'critic_loss': 2825860533.12, 'actor_loss': -248574.31928125, 'time_step': 0.01626941466331482, 'td_error': 2458464795.558204, 'value_scale': 268083.06583979464, 'discounted_advantage': -447814.2755209515, 'initial_state': 270329.03125, 'diff_eval': 113474.30846605197} step=83000
2025-12-06 00:52.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.23it/s, critic_loss=4.56e+9, actor_loss=-3.06e+5]


2025-12-06 00:53.02 [info     ] TD3_20251206002429: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.004560513257980347, 'time_algorithm_update': 0.011195149660110473, 'critic_loss': 4569457087.232, 'actor_loss': -305934.521, 'time_step': 0.016019258260726928, 'td_error': 3791983772.17511, 'value_scale': 327683.96953583404, 'discounted_advantage': -550396.4463516957, 'initial_state': 333729.375, 'diff_eval': 113474.30846605197} step=84000
2025-12-06 00:53.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.25it/s, critic_loss=7.16e+9, actor_loss=-3.73e+5]


2025-12-06 00:53.22 [info     ] TD3_20251206002429: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.00464672327041626, 'time_algorithm_update': 0.01142213773727417, 'critic_loss': 7179698780.16, 'actor_loss': -373342.6815625, 'time_step': 0.01631132245063782, 'td_error': 5767434512.834791, 'value_scale': 396652.20523365465, 'discounted_advantage': -680369.9731482104, 'initial_state': 407881.875, 'diff_eval': 113474.30846605197} step=85000
2025-12-06 00:53.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.88it/s, critic_loss=1.1e+10, actor_loss=-4.51e+5]


2025-12-06 00:53.42 [info     ] TD3_20251206002429: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.004693805694580078, 'time_algorithm_update': 0.011460948944091796, 'critic_loss': 10998497385.472, 'actor_loss': -451833.3418125, 'time_step': 0.016395132064819337, 'td_error': 8323499327.877956, 'value_scale': 476819.4871123219, 'discounted_advantage': -788582.7649973897, 'initial_state': 493711.71875, 'diff_eval': 113474.30846605197} step=86000
2025-12-06 00:53.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.79it/s, critic_loss=1.64e+10, actor_loss=-5.42e+5]


2025-12-06 00:54.02 [info     ] TD3_20251206002429: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.004483431339263916, 'time_algorithm_update': 0.011149513959884644, 'critic_loss': 16428265368.576, 'actor_loss': -542422.0599375, 'time_step': 0.015880101919174196, 'td_error': 11944753242.035833, 'value_scale': 567337.4275199078, 'discounted_advantage': -934674.9114565258, 'initial_state': 591917.1875, 'diff_eval': 113474.30846605197} step=87000
2025-12-06 00:54.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.47it/s, critic_loss=2.43e+10, actor_loss=-6.46e+5]


2025-12-06 00:54.22 [info     ] TD3_20251206002429: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.004712271451950073, 'time_algorithm_update': 0.011514567136764527, 'critic_loss': 24286308766.72, 'actor_loss': -646673.91025, 'time_step': 0.01648688769340515, 'td_error': 16991043128.998114, 'value_scale': 672834.1603625314, 'discounted_advantage': -1098696.0535469197, 'initial_state': 707705.125, 'diff_eval': 113474.30846605197} step=88000
2025-12-06 00:54.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.81it/s, critic_loss=3.5e+10, actor_loss=-7.64e+5]


2025-12-06 00:54.42 [info     ] TD3_20251206002429: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.004615901947021484, 'time_algorithm_update': 0.011274525165557861, 'critic_loss': 35010612002.816, 'actor_loss': -764472.71125, 'time_step': 0.01614112138748169, 'td_error': 23245381734.38139, 'value_scale': 791941.3948030176, 'discounted_advantage': -1237631.1909047517, 'initial_state': 839928.375, 'diff_eval': 113474.30846605197} step=89000
2025-12-06 00:54.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.57it/s, critic_loss=4.95e+10, actor_loss=-8.97e+5]


2025-12-06 00:55.02 [info     ] TD3_20251206002429: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.004689717054367065, 'time_algorithm_update': 0.011555881977081298, 'critic_loss': 49648302983.168, 'actor_loss': -897368.646375, 'time_step': 0.016495962381362916, 'td_error': 32552049781.650093, 'value_scale': 923947.3566376781, 'discounted_advantage': -1491767.7458038973, 'initial_state': 986930.8125, 'diff_eval': 113474.30846605197} step=90000
2025-12-06 00:55.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.81it/s, critic_loss=6.92e+10, actor_loss=-1.05e+6]


2025-12-06 00:55.22 [info     ] TD3_20251206002429: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.004521519422531128, 'time_algorithm_update': 0.011107922077178955, 'critic_loss': 69316548456.448, 'actor_loss': -1049031.714875, 'time_step': 0.015879738330841064, 'td_error': 43416288013.11237, 'value_scale': 1075737.232868818, 'discounted_advantage': -1639300.9203784724, 'initial_state': 1155691.625, 'diff_eval': 113474.30846605197} step=91000
2025-12-06 00:55.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.37it/s, critic_loss=9.54e+10, actor_loss=-1.22e+6]


2025-12-06 00:55.42 [info     ] TD3_20251206002429: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.004694229841232299, 'time_algorithm_update': 0.011560840129852295, 'critic_loss': 95565983531.008, 'actor_loss': -1217757.7275, 'time_step': 0.01652398467063904, 'td_error': 58692081418.12489, 'value_scale': 1240575.0526246857, 'discounted_advantage': -1933672.8815668395, 'initial_state': 1342179.25, 'diff_eval': 113474.30846605197} step=92000
2025-12-06 00:55.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.94it/s, critic_loss=1.3e+11, actor_loss=-1.4e+6] 


2025-12-06 00:56.02 [info     ] TD3_20251206002429: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.004688226461410523, 'time_algorithm_update': 0.011436511516571045, 'critic_loss': 130240287137.792, 'actor_loss': -1404476.10425, 'time_step': 0.016372023820877074, 'td_error': 77875576735.88951, 'value_scale': 1425677.4656852472, 'discounted_advantage': -2196449.025186943, 'initial_state': 1549479.375, 'diff_eval': 113474.30846605197} step=93000
2025-12-06 00:56.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.02it/s, critic_loss=1.74e+11, actor_loss=-1.61e+6]


2025-12-06 00:56.23 [info     ] TD3_20251206002429: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.0046954965591430664, 'time_algorithm_update': 0.011651065826416016, 'critic_loss': 173981020979.2, 'actor_loss': -1611786.37675, 'time_step': 0.016603399515151978, 'td_error': 102369994676.69876, 'value_scale': 1626462.264668902, 'discounted_advantage': -2517433.5656274394, 'initial_state': 1776907.5, 'diff_eval': 113474.30846605197} step=94000
2025-12-06 00:56.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.79it/s, critic_loss=2.31e+11, actor_loss=-1.84e+6]


2025-12-06 00:56.43 [info     ] TD3_20251206002429: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.004870002269744873, 'time_algorithm_update': 0.011835745573043822, 'critic_loss': 230966162604.032, 'actor_loss': -1838739.65075, 'time_step': 0.016960124492645265, 'td_error': 134269468668.46187, 'value_scale': 1851265.9103625314, 'discounted_advantage': -2889273.251200906, 'initial_state': 2032298.875, 'diff_eval': 113474.30846605197} step=95000
2025-12-06 00:56.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.67it/s, critic_loss=3.04e+11, actor_loss=-2.09e+6]


2025-12-06 00:57.03 [info     ] TD3_20251206002429: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.004640054702758789, 'time_algorithm_update': 0.011512009620666504, 'critic_loss': 304571926773.76, 'actor_loss': -2089128.7575, 'time_step': 0.016403068780899047, 'td_error': 171729717007.02054, 'value_scale': 2098762.581831517, 'discounted_advantage': -3195477.239029954, 'initial_state': 2312571.25, 'diff_eval': 113474.30846605197} step=96000
2025-12-06 00:57.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.93it/s, critic_loss=3.95e+11, actor_loss=-2.37e+6]


2025-12-06 00:57.25 [info     ] TD3_20251206002429: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.005127406120300293, 'time_algorithm_update': 0.012515363931655884, 'critic_loss': 394799540617.216, 'actor_loss': -2366506.27, 'time_step': 0.017889492511749268, 'td_error': 219364991611.68256, 'value_scale': 2374974.22983026, 'discounted_advantage': -3552775.253821916, 'initial_state': 2630176.0, 'diff_eval': 113474.30846605197} step=97000
2025-12-06 00:57.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.44it/s, critic_loss=5.1e+11, actor_loss=-2.67e+6]


2025-12-06 00:57.45 [info     ] TD3_20251206002429: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.004747625112533569, 'time_algorithm_update': 0.011506258010864258, 'critic_loss': 510146020212.736, 'actor_loss': -2673492.7355, 'time_step': 0.01649659490585327, 'td_error': 279414695659.5065, 'value_scale': 2669984.4899413246, 'discounted_advantage': -4001780.9400085737, 'initial_state': 2968515.0, 'diff_eval': 113474.30846605197} step=98000
2025-12-06 00:57.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.46it/s, critic_loss=6.58e+11, actor_loss=-3.01e+6]


2025-12-06 00:58.05 [info     ] TD3_20251206002429: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.004493329048156738, 'time_algorithm_update': 0.011810575008392334, 'critic_loss': 658494572167.168, 'actor_loss': -3007771.764, 'time_step': 0.01654132533073425, 'td_error': 353219924458.89734, 'value_scale': 2996311.6903813914, 'discounted_advantage': -4483313.392697715, 'initial_state': 3344854.5, 'diff_eval': 113474.30846605197} step=99000
2025-12-06 00:58.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.57it/s, critic_loss=8.27e+11, actor_loss=-3.37e+6]


2025-12-06 00:58.26 [info     ] TD3_20251206002429: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.004978521347045899, 'time_algorithm_update': 0.012092315435409546, 'critic_loss': 827283031392.256, 'actor_loss': -3375113.1255, 'time_step': 0.01733669662475586, 'td_error': 443534624888.3789, 'value_scale': 3347413.5388725903, 'discounted_advantage': -5020047.4334689565, 'initial_state': 3748755.5, 'diff_eval': 113474.30846605197} step=100000
2025-12-06 00:58.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.65it/s, critic_loss=1.05e+12, actor_loss=-3.77e+6]


2025-12-06 00:58.48 [info     ] TD3_20251206002429: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.0047506229877471925, 'time_algorithm_update': 0.012953511238098145, 'critic_loss': 1054587045937.152, 'actor_loss': -3771138.572, 'time_step': 0.017963271141052246, 'td_error': 553955843683.9065, 'value_scale': 3733256.158214585, 'discounted_advantage': -5585458.271097389, 'initial_state': 4200145.5, 'diff_eval': 113474.30846605197} step=101000
2025-12-06 00:58.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.05it/s, critic_loss=1.31e+12, actor_loss=-4.2e+6]


2025-12-06 00:59.08 [info     ] TD3_20251206002429: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.004597495794296264, 'time_algorithm_update': 0.011268258810043334, 'critic_loss': 1309804189057.024, 'actor_loss': -4200642.7095, 'time_step': 0.01609429359436035, 'td_error': 685316230770.8827, 'value_scale': 4150110.109492875, 'discounted_advantage': -6152592.735003861, 'initial_state': 4683645.0, 'diff_eval': 113474.30846605197} step=102000
2025-12-06 00:59.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.03it/s, critic_loss=1.63e+12, actor_loss=-4.67e+6]


2025-12-06 00:59.29 [info     ] TD3_20251206002429: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.004631810188293457, 'time_algorithm_update': 0.012333840131759644, 'critic_loss': 1633448975925.248, 'actor_loss': -4668864.044, 'time_step': 0.017220149040222166, 'td_error': 846260594464.0369, 'value_scale': 4606166.741932104, 'discounted_advantage': -6807645.936054238, 'initial_state': 5217414.5, 'diff_eval': 113474.30846605197} step=103000
2025-12-06 00:59.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.08it/s, critic_loss=2.03e+12, actor_loss=-5.18e+6]


2025-12-06 00:59.49 [info     ] TD3_20251206002429: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.004712464809417724, 'time_algorithm_update': 0.011111824750900268, 'critic_loss': 2034856510488.576, 'actor_loss': -5178933.157, 'time_step': 0.016068360328674317, 'td_error': 1050358772053.409, 'value_scale': 5096441.787720034, 'discounted_advantage': -7677324.569851024, 'initial_state': 5788685.5, 'diff_eval': 113474.30846605197} step=104000
2025-12-06 00:59.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.16it/s, critic_loss=2.52e+12, actor_loss=-5.74e+6]


2025-12-06 01:00.09 [info     ] TD3_20251206002429: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.004467229843139648, 'time_algorithm_update': 0.01134432029724121, 'critic_loss': 2521500251324.416, 'actor_loss': -5739525.378, 'time_step': 0.01605065417289734, 'td_error': 1280424653945.751, 'value_scale': 5642200.592937971, 'discounted_advantage': -8327960.407156299, 'initial_state': 6433670.0, 'diff_eval': 113474.30846605197} step=105000
2025-12-06 01:00.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.84it/s, critic_loss=3.1e+12, actor_loss=-6.34e+6]


2025-12-06 01:00.29 [info     ] TD3_20251206002429: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.004723454475402832, 'time_algorithm_update': 0.011469048738479615, 'critic_loss': 3104970935107.584, 'actor_loss': -6338011.923, 'time_step': 0.016427246809005737, 'td_error': 1559090022372.9639, 'value_scale': 6207304.029756916, 'discounted_advantage': -9213179.284715421, 'initial_state': 7100449.5, 'diff_eval': 113474.30846605197} step=106000
2025-12-06 01:00.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.93it/s, critic_loss=3.79e+12, actor_loss=-6.97e+6]


2025-12-06 01:00.49 [info     ] TD3_20251206002429: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.004577237606048584, 'time_algorithm_update': 0.011276955127716065, 'critic_loss': 3790795114807.296, 'actor_loss': -6975157.666, 'time_step': 0.016109057903289793, 'td_error': 1892444127230.0654, 'value_scale': 6830001.289501257, 'discounted_advantage': -10081260.914710978, 'initial_state': 7830651.0, 'diff_eval': 113474.30846605197} step=107000
2025-12-06 01:00.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.90it/s, critic_loss=4.59e+12, actor_loss=-7.66e+6]


2025-12-06 01:01.09 [info     ] TD3_20251206002429: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.004745946884155273, 'time_algorithm_update': 0.011667171716690064, 'critic_loss': 4590622922506.24, 'actor_loss': -7666220.67, 'time_step': 0.016655727386474608, 'td_error': 2291565220178.0127, 'value_scale': 7487148.37992456, 'discounted_advantage': -11181783.359634364, 'initial_state': 8612828.0, 'diff_eval': 113474.30846605197} step=108000
2025-12-06 01:01.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.30it/s, critic_loss=5.62e+12, actor_loss=-8.4e+6]


2025-12-06 01:01.29 [info     ] TD3_20251206002429: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.004747979640960694, 'time_algorithm_update': 0.011536628007888794, 'critic_loss': 5626511196684.288, 'actor_loss': -8407480.786, 'time_step': 0.016543818712234495, 'td_error': 2751339394491.2656, 'value_scale': 8207834.1986588435, 'discounted_advantage': -12098813.11968958, 'initial_state': 9465009.0, 'diff_eval': 113474.30846605197} step=109000
2025-12-06 01:01.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.88it/s, critic_loss=6.77e+12, actor_loss=-9.2e+6]


2025-12-06 01:01.50 [info     ] TD3_20251206002429: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.00461560320854187, 'time_algorithm_update': 0.011475226163864136, 'critic_loss': 6773091157147.648, 'actor_loss': -9206454.464, 'time_step': 0.016370354413986207, 'td_error': 3306345550346.367, 'value_scale': 8983982.55217938, 'discounted_advantage': -13257244.637474598, 'initial_state': 10389459.0, 'diff_eval': 113474.30846605197} step=110000
2025-12-06 01:01.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.51it/s, critic_loss=8.12e+12, actor_loss=-1.01e+7]


2025-12-06 01:02.10 [info     ] TD3_20251206002429: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.004743834018707275, 'time_algorithm_update': 0.011492779731750488, 'critic_loss': 8136679266451.456, 'actor_loss': -10075257.828, 'time_step': 0.01649190306663513, 'td_error': 3928637703761.2944, 'value_scale': 9807717.042539816, 'discounted_advantage': -14297311.002593627, 'initial_state': 11377628.0, 'diff_eval': 113474.30846605197} step=111000
2025-12-06 01:02.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.55it/s, critic_loss=9.58e+12, actor_loss=-1.1e+7]


2025-12-06 01:02.30 [info     ] TD3_20251206002429: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.004645050048828125, 'time_algorithm_update': 0.011585059404373169, 'critic_loss': 9586270759550.977, 'actor_loss': -10983973.518, 'time_step': 0.01647512626647949, 'td_error': 4665697547560.096, 'value_scale': 10671177.089480301, 'discounted_advantage': -15525034.380116718, 'initial_state': 12405496.0, 'diff_eval': 113474.30846605197} step=112000
2025-12-06 01:02.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.64it/s, critic_loss=1.15e+13, actor_loss=-1.19e+7]


2025-12-06 01:02.50 [info     ] TD3_20251206002429: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.004763547420501709, 'time_algorithm_update': 0.011691675424575805, 'critic_loss': 11525802582081.535, 'actor_loss': -11942084.968, 'time_step': 0.01673080325126648, 'td_error': 5541414969912.642, 'value_scale': 11585989.505238894, 'discounted_advantage': -17083946.645810436, 'initial_state': 13510308.0, 'diff_eval': 113474.30846605197} step=113000
2025-12-06 01:02.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.67it/s, critic_loss=1.36e+13, actor_loss=-1.3e+7]


2025-12-06 01:03.10 [info     ] TD3_20251206002429: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.004475100994110108, 'time_algorithm_update': 0.011173689365386963, 'critic_loss': 13587329278017.535, 'actor_loss': -12970063.784, 'time_step': 0.01589796471595764, 'td_error': 6513616081026.746, 'value_scale': 12560369.912824811, 'discounted_advantage': -18395216.399805784, 'initial_state': 14677842.0, 'diff_eval': 113474.30846605197} step=114000
2025-12-06 01:03.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.59it/s, critic_loss=1.61e+13, actor_loss=-1.41e+7]


2025-12-06 01:03.30 [info     ] TD3_20251206002429: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.0044654374122619625, 'time_algorithm_update': 0.011455538034439086, 'critic_loss': 16119461601869.824, 'actor_loss': -14055081.992, 'time_step': 0.01618358826637268, 'td_error': 7635092695892.246, 'value_scale': 13601708.27221291, 'discounted_advantage': -19741966.236848593, 'initial_state': 15928489.0, 'diff_eval': 113474.30846605197} step=115000
2025-12-06 01:03.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.76it/s, critic_loss=1.9e+13, actor_loss=-1.52e+7]


2025-12-06 01:03.49 [info     ] TD3_20251206002429: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.0045803308486938475, 'time_algorithm_update': 0.011314776182174682, 'critic_loss': 19044177228595.2, 'actor_loss': -15208863.822, 'time_step': 0.016151308059692383, 'td_error': 8987617949165.438, 'value_scale': 14700082.116722548, 'discounted_advantage': -21655669.13084257, 'initial_state': 17260892.0, 'diff_eval': 113474.30846605197} step=116000
2025-12-06 01:03.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.55it/s, critic_loss=2.22e+13, actor_loss=-1.64e+7]


2025-12-06 01:04.10 [info     ] TD3_20251206002429: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.004805985450744629, 'time_algorithm_update': 0.011681772708892823, 'critic_loss': 22218942766383.105, 'actor_loss': -16424524.414, 'time_step': 0.016749650478363038, 'td_error': 10413861810682.188, 'value_scale': 15866600.494341996, 'discounted_advantage': -22935020.491053842, 'initial_state': 18668968.0, 'diff_eval': 113474.30846605197} step=117000
2025-12-06 01:04.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.72it/s, critic_loss=2.6e+13, actor_loss=-1.77e+7]


2025-12-06 01:04.30 [info     ] TD3_20251206002429: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.004591511487960815, 'time_algorithm_update': 0.011309345483779907, 'critic_loss': 26016470915874.816, 'actor_loss': -17726145.956, 'time_step': 0.016160167932510378, 'td_error': 12121035441337.637, 'value_scale': 17115614.674350377, 'discounted_advantage': -24591027.54544616, 'initial_state': 20177118.0, 'diff_eval': 113474.30846605197} step=118000
2025-12-06 01:04.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.25it/s, critic_loss=3.01e+13, actor_loss=-1.91e+7]


2025-12-06 01:04.50 [info     ] TD3_20251206002429: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.004588785648345947, 'time_algorithm_update': 0.01144578719139099, 'critic_loss': 30143380916797.44, 'actor_loss': -19100187.032, 'time_step': 0.016286511659622194, 'td_error': 14069697711657.412, 'value_scale': 18389519.875943, 'discounted_advantage': -26691466.613851156, 'initial_state': 21735588.0, 'diff_eval': 113474.30846605197} step=119000
2025-12-06 01:04.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.81it/s, critic_loss=3.52e+13, actor_loss=-2.05e+7]


2025-12-06 01:05.10 [info     ] TD3_20251206002429: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.00467818021774292, 'time_algorithm_update': 0.011487828016281128, 'critic_loss': 35298106891304.96, 'actor_loss': -20506962.236, 'time_step': 0.016404518842697145, 'td_error': 16251321465352.965, 'value_scale': 19761574.950544845, 'discounted_advantage': -28564561.29338895, 'initial_state': 23410680.0, 'diff_eval': 113474.30846605197} step=120000
2025-12-06 01:05.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.04it/s, critic_loss=4.06e+13, actor_loss=-2.2e+7]


2025-12-06 01:05.30 [info     ] TD3_20251206002429: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.004578615665435791, 'time_algorithm_update': 0.011261215686798096, 'critic_loss': 40613021870981.12, 'actor_loss': -22037962.424, 'time_step': 0.01608275008201599, 'td_error': 18768121417950.082, 'value_scale': 21216709.907376364, 'discounted_advantage': -30604285.629849263, 'initial_state': 25177542.0, 'diff_eval': 113474.30846605197} step=121000
2025-12-06 01:05.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.79it/s, critic_loss=4.71e+13, actor_loss=-2.36e+7]


2025-12-06 01:05.50 [info     ] TD3_20251206002429: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.004825631618499756, 'time_algorithm_update': 0.011618282794952393, 'critic_loss': 47139110298583.04, 'actor_loss': -23652738.58, 'time_step': 0.01668829393386841, 'td_error': 21705069849087.31, 'value_scale': 22727766.699497066, 'discounted_advantage': -33235236.83993374, 'initial_state': 27019884.0, 'diff_eval': 113474.30846605197} step=122000
2025-12-06 01:05.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.62it/s, critic_loss=5.41e+13, actor_loss=-2.54e+7]


2025-12-06 01:06.11 [info     ] TD3_20251206002429: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.004845362424850464, 'time_algorithm_update': 0.011654098987579346, 'critic_loss': 54149803275190.27, 'actor_loss': -25368754.356, 'time_step': 0.016743030071258543, 'td_error': 24764310395874.938, 'value_scale': 24366414.00544845, 'discounted_advantage': -34856846.489534825, 'initial_state': 29028684.0, 'diff_eval': 113474.30846605197} step=123000
2025-12-06 01:06.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.38it/s, critic_loss=6.19e+13, actor_loss=-2.71e+7]


2025-12-06 01:06.31 [info     ] TD3_20251206002429: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.004597633838653564, 'time_algorithm_update': 0.01143079972267151, 'critic_loss': 61994676470677.51, 'actor_loss': -27125345.804, 'time_step': 0.01626476287841797, 'td_error': 28354356184623.043, 'value_scale': 26010904.651299246, 'discounted_advantage': -37500328.31899817, 'initial_state': 31051504.0, 'diff_eval': 113474.30846605197} step=124000
2025-12-06 01:06.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.40it/s, critic_loss=7.17e+13, actor_loss=-2.9e+7]


2025-12-06 01:06.50 [info     ] TD3_20251206002429: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.004490774869918823, 'time_algorithm_update': 0.011240577220916748, 'critic_loss': 71717755107147.78, 'actor_loss': -28989108.872, 'time_step': 0.015974891662597655, 'td_error': 32435888028223.28, 'value_scale': 27784772.09597653, 'discounted_advantage': -40130296.62300401, 'initial_state': 33221008.0, 'diff_eval': 113474.30846605197} step=125000
2025-12-06 01:06.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.92it/s, critic_loss=8.15e+13, actor_loss=-3.09e+7]


2025-12-06 01:07.10 [info     ] TD3_20251206002429: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.004683969259262085, 'time_algorithm_update': 0.011464811086654663, 'critic_loss': 81526032477192.19, 'actor_loss': -30946562.432, 'time_step': 0.01638635230064392, 'td_error': 36956507783501.02, 'value_scale': 29640348.46102263, 'discounted_advantage': -42767266.38361409, 'initial_state': 35503136.0, 'diff_eval': 113474.30846605197} step=126000
2025-12-06 01:07.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.72it/s, critic_loss=9.29e+13, actor_loss=-3.3e+7]


2025-12-06 01:07.31 [info     ] TD3_20251206002429: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.004737250804901123, 'time_algorithm_update': 0.012055620193481446, 'critic_loss': 92950523791015.94, 'actor_loss': -32992049.616, 'time_step': 0.0170272696018219, 'td_error': 41985443550010.91, 'value_scale': 31591285.391031016, 'discounted_advantage': -45456107.50303023, 'initial_state': 37911216.0, 'diff_eval': 113474.30846605197} step=127000
2025-12-06 01:07.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.39it/s, critic_loss=1.06e+14, actor_loss=-3.52e+7]


2025-12-06 01:07.51 [info     ] TD3_20251206002429: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.004642861366271973, 'time_algorithm_update': 0.011929465055465698, 'critic_loss': 106066513743577.1, 'actor_loss': -35162419.008, 'time_step': 0.01682452630996704, 'td_error': 47663199499076.41, 'value_scale': 33630345.64375524, 'discounted_advantage': -48391704.239388384, 'initial_state': 40429032.0, 'diff_eval': 113474.30846605197} step=128000
2025-12-06 01:07.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.79it/s, critic_loss=1.19e+14, actor_loss=-3.74e+7]


2025-12-06 01:08.11 [info     ] TD3_20251206002429: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.004441365003585816, 'time_algorithm_update': 0.011191547632217407, 'critic_loss': 119394137668583.42, 'actor_loss': -37424646.48, 'time_step': 0.015886372804641724, 'td_error': 53736404366810.16, 'value_scale': 35786942.594300084, 'discounted_advantage': -50605944.17065546, 'initial_state': 43078532.0, 'diff_eval': 113474.30846605197} step=129000
2025-12-06 01:08.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.03it/s, critic_loss=1.37e+14, actor_loss=-3.98e+7]


2025-12-06 01:08.31 [info     ] TD3_20251206002429: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.0045199525356292725, 'time_algorithm_update': 0.0110482017993927, 'critic_loss': 137188405673984.0, 'actor_loss': -39772644.504, 'time_step': 0.015813828468322754, 'td_error': 60958134416375.2, 'value_scale': 37964950.07879296, 'discounted_advantage': -54764273.45340801, 'initial_state': 45782284.0, 'diff_eval': 113474.30846605197} step=130000
2025-12-06 01:08.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.07it/s, critic_loss=1.55e+14, actor_loss=-4.22e+7]


2025-12-06 01:08.51 [info     ] TD3_20251206002429: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.00473624300956726, 'time_algorithm_update': 0.011629735946655274, 'critic_loss': 154670094885060.6, 'actor_loss': -42225071.016, 'time_step': 0.01661656093597412, 'td_error': 68925579186118.305, 'value_scale': 40293064.34199497, 'discounted_advantage': -58485935.0420358, 'initial_state': 48678932.0, 'diff_eval': 113474.30846605197} step=131000
2025-12-06 01:08.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.02it/s, critic_loss=1.75e+14, actor_loss=-4.48e+7]


2025-12-06 01:09.11 [info     ] TD3_20251206002429: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.004483300924301148, 'time_algorithm_update': 0.01135584306716919, 'critic_loss': 175108757250899.97, 'actor_loss': -44827909.472, 'time_step': 0.01608454751968384, 'td_error': 77332632230634.22, 'value_scale': 42795421.79463537, 'discounted_advantage': -61041348.35138234, 'initial_state': 51782616.0, 'diff_eval': 113474.30846605197} step=132000
2025-12-06 01:09.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:18<00:00, 55.54it/s, critic_loss=1.96e+14, actor_loss=-4.76e+7]


2025-12-06 01:09.32 [info     ] TD3_20251206002429: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.0049009954929351805, 'time_algorithm_update': 0.012533878087997436, 'critic_loss': 196024755302694.9, 'actor_loss': -47574065.016, 'time_step': 0.017693581342697145, 'td_error': 87035316289510.95, 'value_scale': 45346742.87342833, 'discounted_advantage': -64805964.405380696, 'initial_state': 54943220.0, 'diff_eval': 113474.30846605197} step=133000
2025-12-06 01:09.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.27it/s, critic_loss=2.21e+14, actor_loss=-5.04e+7]


2025-12-06 01:09.52 [info     ] TD3_20251206002429: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.004640030384063721, 'time_algorithm_update': 0.011426327466964722, 'critic_loss': 220795957078392.84, 'actor_loss': -50377971.2, 'time_step': 0.016304609537124634, 'td_error': 97847950049169.67, 'value_scale': 48054613.90360436, 'discounted_advantage': -68686059.75963132, 'initial_state': 58308120.0, 'diff_eval': 113474.30846605197} step=134000
2025-12-06 01:09.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.17it/s, critic_loss=2.49e+14, actor_loss=-5.34e+7]


2025-12-06 01:10.12 [info     ] TD3_20251206002429: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.004796706676483155, 'time_algorithm_update': 0.011524563550949097, 'critic_loss': 248584245671362.56, 'actor_loss': -53385220.184, 'time_step': 0.016582876682281495, 'td_error': 109770724003132.03, 'value_scale': 50932527.347862534, 'discounted_advantage': -72201329.18214169, 'initial_state': 61886084.0, 'diff_eval': 113474.30846605197} step=135000
2025-12-06 01:10.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.16it/s, critic_loss=2.79e+14, actor_loss=-5.65e+7]


2025-12-06 01:10.32 [info     ] TD3_20251206002429: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.00458515453338623, 'time_algorithm_update': 0.011243086338043213, 'critic_loss': 278712431985819.66, 'actor_loss': -56556737.992, 'time_step': 0.016059349060058594, 'td_error': 123114187175362.2, 'value_scale': 53872955.504610226, 'discounted_advantage': -76750623.3822302, 'initial_state': 65561540.0, 'diff_eval': 113474.30846605197} step=136000
2025-12-06 01:10.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.80it/s, critic_loss=3.14e+14, actor_loss=-5.98e+7]


2025-12-06 01:10.52 [info     ] TD3_20251206002429: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.0046104793548583985, 'time_algorithm_update': 0.011307434797286988, 'critic_loss': 314450172761341.94, 'actor_loss': -59818538.928, 'time_step': 0.016155507326126098, 'td_error': 137764670371989.89, 'value_scale': 56919363.79631182, 'discounted_advantage': -81358054.85524602, 'initial_state': 69369120.0, 'diff_eval': 113474.30846605197} step=137000
2025-12-06 01:10.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.67it/s, critic_loss=3.47e+14, actor_loss=-6.32e+7]


2025-12-06 01:11.12 [info     ] TD3_20251206002429: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.004574001550674439, 'time_algorithm_update': 0.011370205879211425, 'critic_loss': 347057100683214.9, 'actor_loss': -63168189.824, 'time_step': 0.016186323165893555, 'td_error': 153397079255112.62, 'value_scale': 60039914.62112322, 'discounted_advantage': -85744408.64487883, 'initial_state': 73240368.0, 'diff_eval': 113474.30846605197} step=138000
2025-12-06 01:11.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.36it/s, critic_loss=3.93e+14, actor_loss=-6.66e+7]


2025-12-06 01:11.31 [info     ] TD3_20251206002429: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.004507941246032715, 'time_algorithm_update': 0.011256171941757202, 'critic_loss': 393084077646807.06, 'actor_loss': -66661876.52, 'time_step': 0.016008002519607545, 'td_error': 171153828953915.8, 'value_scale': 63324433.64459346, 'discounted_advantage': -90724493.05790406, 'initial_state': 77355304.0, 'diff_eval': 113474.30846605197} step=139000
2025-12-06 01:11.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.92it/s, critic_loss=4.36e+14, actor_loss=-7.03e+7]


2025-12-06 01:11.51 [info     ] TD3_20251206002429: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.004489944219589233, 'time_algorithm_update': 0.011125164985656738, 'critic_loss': 435994269472260.1, 'actor_loss': -70273669.088, 'time_step': 0.01584834861755371, 'td_error': 189867419828527.22, 'value_scale': 66784466.60016765, 'discounted_advantage': -94911641.88995308, 'initial_state': 81695792.0, 'diff_eval': 113474.30846605197} step=140000
2025-12-06 01:11.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.06it/s, critic_loss=4.83e+14, actor_loss=-7.4e+7]


2025-12-06 01:12.11 [info     ] TD3_20251206002429: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.004730683326721191, 'time_algorithm_update': 0.011371158599853515, 'critic_loss': 482849067542511.6, 'actor_loss': -74004209.056, 'time_step': 0.016352610349655152, 'td_error': 210648802002269.22, 'value_scale': 70363231.1349539, 'discounted_advantage': -99607823.42358665, 'initial_state': 86191944.0, 'diff_eval': 113474.30846605197} step=141000
2025-12-06 01:12.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.23it/s, critic_loss=5.42e+14, actor_loss=-7.79e+7]


2025-12-06 01:12.31 [info     ] TD3_20251206002429: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.00461708664894104, 'time_algorithm_update': 0.011447224378585816, 'critic_loss': 542172346899759.1, 'actor_loss': -77938404.608, 'time_step': 0.01631417655944824, 'td_error': 234062097684960.1, 'value_scale': 74003733.53813915, 'discounted_advantage': -105690672.07590964, 'initial_state': 90739776.0, 'diff_eval': 113474.30846605197} step=142000
2025-12-06 01:12.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.41it/s, critic_loss=5.97e+14, actor_loss=-8.2e+7]


2025-12-06 01:12.51 [info     ] TD3_20251206002429: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.004626408100128174, 'time_algorithm_update': 0.011377952814102173, 'critic_loss': 597509420619923.5, 'actor_loss': -82005041.568, 'time_step': 0.016252274513244628, 'td_error': 259104758959807.62, 'value_scale': 77983543.24979044, 'discounted_advantage': -110014219.01498319, 'initial_state': 95738432.0, 'diff_eval': 113474.30846605197} step=143000
2025-12-06 01:12.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.00it/s, critic_loss=6.61e+14, actor_loss=-8.63e+7]


2025-12-06 01:13.11 [info     ] TD3_20251206002429: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.0045728588104248045, 'time_algorithm_update': 0.011283929109573364, 'critic_loss': 661634248112865.2, 'actor_loss': -86330635.312, 'time_step': 0.016100772380828857, 'td_error': 286966239060239.7, 'value_scale': 81905862.38055323, 'discounted_advantage': -116615415.1844353, 'initial_state': 100672376.0, 'diff_eval': 113474.30846605197} step=144000
2025-12-06 01:13.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.90it/s, critic_loss=7.35e+14, actor_loss=-9.08e+7]


2025-12-06 01:13.31 [info     ] TD3_20251206002429: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.00468229079246521, 'time_algorithm_update': 0.011480811357498169, 'critic_loss': 735664393055895.5, 'actor_loss': -90775206.544, 'time_step': 0.0164035120010376, 'td_error': 316353722052215.25, 'value_scale': 86054578.56663872, 'discounted_advantage': -121897656.1864322, 'initial_state': 105894664.0, 'diff_eval': 113474.30846605197} step=145000
2025-12-06 01:13.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.23it/s, critic_loss=8.06e+14, actor_loss=-9.52e+7]


2025-12-06 01:13.51 [info     ] TD3_20251206002429: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.004659011840820313, 'time_algorithm_update': 0.011370602130889893, 'critic_loss': 806433214645141.5, 'actor_loss': -95214619.344, 'time_step': 0.01628536891937256, 'td_error': 348549233170580.6, 'value_scale': 90335597.47191954, 'discounted_advantage': -127401812.51561229, 'initial_state': 111297864.0, 'diff_eval': 113474.30846605197} step=146000
2025-12-06 01:13.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.81it/s, critic_loss=8.93e+14, actor_loss=-9.99e+7]


2025-12-06 01:14.10 [info     ] TD3_20251206002429: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.004483269453048706, 'time_algorithm_update': 0.011155807256698609, 'critic_loss': 892908281972916.2, 'actor_loss': -99933388.4, 'time_step': 0.01587721109390259, 'td_error': 383665364186230.0, 'value_scale': 94734050.19111484, 'discounted_advantage': -133716164.0146115, 'initial_state': 116837472.0, 'diff_eval': 113474.30846605197} step=147000
2025-12-06 01:14.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.72it/s, critic_loss=9.7e+14, actor_loss=-1.05e+8]


2025-12-06 01:14.30 [info     ] TD3_20251206002429: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.004511182308197022, 'time_algorithm_update': 0.01112502908706665, 'critic_loss': 968909186886795.2, 'actor_loss': -104722073.872, 'time_step': 0.015879333019256592, 'td_error': 420981334906515.5, 'value_scale': 99270186.01005867, 'discounted_advantage': -139494907.63021904, 'initial_state': 122545952.0, 'diff_eval': 113474.30846605197} step=148000
2025-12-06 01:14.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.97it/s, critic_loss=1.07e+15, actor_loss=-1.1e+8]


2025-12-06 01:14.49 [info     ] TD3_20251206002429: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.004529912471771241, 'time_algorithm_update': 0.011085602283477783, 'critic_loss': 1071460303146844.1, 'actor_loss': -109768943.888, 'time_step': 0.015853479385375978, 'td_error': 462448705404886.94, 'value_scale': 103913079.89270747, 'discounted_advantage': -146894957.94429106, 'initial_state': 128388608.0, 'diff_eval': 113474.30846605197} step=149000
2025-12-06 01:14.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.15it/s, critic_loss=1.18e+15, actor_loss=-1.15e+8]


2025-12-06 01:15.09 [info     ] TD3_20251206002429: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.004368850946426392, 'time_algorithm_update': 0.01111436915397644, 'critic_loss': 1176897830032244.8, 'actor_loss': -114970645.088, 'time_step': 0.01575755763053894, 'td_error': 507295109332537.2, 'value_scale': 108882876.83487007, 'discounted_advantage': -153134452.2413497, 'initial_state': 134654368.0, 'diff_eval': 113474.30846605197} step=150000
2025-12-06 01:15.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.41it/s, critic_loss=1.3e+15, actor_loss=-1.2e+8] 


2025-12-06 01:15.30 [info     ] TD3_20251206002429: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.004893719911575317, 'time_algorithm_update': 0.011895040988922119, 'critic_loss': 1300370163299778.5, 'actor_loss': -120293997.92, 'time_step': 0.017066274404525756, 'td_error': 556291893923497.2, 'value_scale': 113813679.65465213, 'discounted_advantage': -161530143.55415434, 'initial_state': 140873808.0, 'diff_eval': 113474.30846605197} step=151000
2025-12-06 01:15.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.65it/s, critic_loss=1.42e+15, actor_loss=-1.26e+8]


2025-12-06 01:15.50 [info     ] TD3_20251206002429: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.004607468366622925, 'time_algorithm_update': 0.011330414056777954, 'critic_loss': 1417266343425605.8, 'actor_loss': -125799682.832, 'time_step': 0.01617858362197876, 'td_error': 609016501743974.9, 'value_scale': 119088224.33696564, 'discounted_advantage': -168680548.45531592, 'initial_state': 147535152.0, 'diff_eval': 113474.30846605197} step=152000
2025-12-06 01:15.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.82it/s, critic_loss=1.55e+15, actor_loss=-1.32e+8]


2025-12-06 01:16.09 [info     ] TD3_20251206002429: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.004500354766845703, 'time_algorithm_update': 0.011152155876159668, 'critic_loss': 1548028531596853.2, 'actor_loss': -131711564.496, 'time_step': 0.015889450073242186, 'td_error': 665134252970018.0, 'value_scale': 124605318.4559933, 'discounted_advantage': -174855474.96376052, 'initial_state': 154488576.0, 'diff_eval': 113474.30846605197} step=153000
2025-12-06 01:16.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.30it/s, critic_loss=1.71e+15, actor_loss=-1.38e+8]


2025-12-06 01:16.29 [info     ] TD3_20251206002429: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.004790424823760986, 'time_algorithm_update': 0.011797876834869384, 'critic_loss': 1707919831476469.8, 'actor_loss': -137696889.856, 'time_step': 0.016833747148513795, 'td_error': 727467272092924.8, 'value_scale': 130187609.15004191, 'discounted_advantage': -183486610.76271752, 'initial_state': 161596096.0, 'diff_eval': 113474.30846605197} step=154000
2025-12-06 01:16.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.81it/s, critic_loss=1.86e+15, actor_loss=-1.44e+8]


2025-12-06 01:16.50 [info     ] TD3_20251206002429: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.0047626843452453615, 'time_algorithm_update': 0.011916377782821655, 'critic_loss': 1857862622104256.5, 'actor_loss': -143835899.648, 'time_step': 0.016943785667419435, 'td_error': 792158451617378.1, 'value_scale': 135984371.7988265, 'discounted_advantage': -190148387.58580136, 'initial_state': 168914784.0, 'diff_eval': 113474.30846605197} step=155000
2025-12-06 01:16.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.01it/s, critic_loss=2.03e+15, actor_loss=-1.5e+8]


2025-12-06 01:17.10 [info     ] TD3_20251206002429: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.004685813426971435, 'time_algorithm_update': 0.011433998346328736, 'critic_loss': 2027257867962155.0, 'actor_loss': -150209477.92, 'time_step': 0.016364600181579588, 'td_error': 864692583168968.0, 'value_scale': 141814268.48784578, 'discounted_advantage': -200143789.36375716, 'initial_state': 176301632.0, 'diff_eval': 113474.30846605197} step=156000
2025-12-06 01:17.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.74it/s, critic_loss=2.22e+15, actor_loss=-1.57e+8]


2025-12-06 01:17.30 [info     ] TD3_20251206002429: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.004605913400650024, 'time_algorithm_update': 0.01131492257118225, 'critic_loss': 2224920727365091.2, 'actor_loss': -156630630.912, 'time_step': 0.01615942645072937, 'td_error': 943126649567279.8, 'value_scale': 147940323.43168482, 'discounted_advantage': -209629608.4487443, 'initial_state': 184036768.0, 'diff_eval': 113474.30846605197} step=157000
2025-12-06 01:17.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.20it/s, critic_loss=2.43e+15, actor_loss=-1.63e+8]


2025-12-06 01:17.50 [info     ] TD3_20251206002429: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.004608646154403686, 'time_algorithm_update': 0.011461953401565552, 'critic_loss': 2426171374142750.5, 'actor_loss': -163375079.68, 'time_step': 0.016311748504638672, 'td_error': 1026539700918986.4, 'value_scale': 154244044.44090527, 'discounted_advantage': -218875489.59928468, 'initial_state': 192018960.0, 'diff_eval': 113474.30846605197} step=158000
2025-12-06 01:17.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.49it/s, critic_loss=2.64e+15, actor_loss=-1.7e+8]


2025-12-06 01:18.10 [info     ] TD3_20251206002429: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.004603241682052612, 'time_algorithm_update': 0.011356121063232421, 'critic_loss': 2635494192909910.0, 'actor_loss': -170362492.032, 'time_step': 0.016217422723770143, 'td_error': 1116016542719698.2, 'value_scale': 160905338.52808046, 'discounted_advantage': -227246181.0540634, 'initial_state': 200501232.0, 'diff_eval': 113474.30846605197} step=159000
2025-12-06 01:18.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.49it/s, critic_loss=2.87e+15, actor_loss=-1.78e+8]


2025-12-06 01:18.30 [info     ] TD3_20251206002429: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.004506095170974732, 'time_algorithm_update': 0.011228768587112426, 'critic_loss': 2873267324004073.5, 'actor_loss': -177560394.048, 'time_step': 0.015971830368041992, 'td_error': 1213214827408288.8, 'value_scale': 167800591.80553228, 'discounted_advantage': -236206087.41983557, 'initial_state': 209300016.0, 'diff_eval': 113474.30846605197} step=160000
2025-12-06 01:18.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.18it/s, critic_loss=3.12e+15, actor_loss=-1.85e+8]


2025-12-06 01:18.50 [info     ] TD3_20251206002429: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.004513727903366089, 'time_algorithm_update': 0.011286318063735963, 'critic_loss': 3117317118828216.5, 'actor_loss': -185142971.04, 'time_step': 0.016048739194869995, 'td_error': 1317632206191212.8, 'value_scale': 174748976.42917016, 'discounted_advantage': -246690628.5183661, 'initial_state': 218086640.0, 'diff_eval': 113474.30846605197} step=161000
2025-12-06 01:18.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.22it/s, critic_loss=3.33e+15, actor_loss=-1.93e+8]


2025-12-06 01:19.11 [info     ] TD3_20251206002429: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.004663145542144776, 'time_algorithm_update': 0.012260606527328492, 'critic_loss': 3328201738967580.5, 'actor_loss': -192803124.0, 'time_step': 0.017172832012176512, 'td_error': 1425350028073970.2, 'value_scale': 181985240.22799665, 'discounted_advantage': -254515604.76238373, 'initial_state': 227302544.0, 'diff_eval': 113474.30846605197} step=162000
2025-12-06 01:19.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.35it/s, critic_loss=3.66e+15, actor_loss=-2.01e+8]


2025-12-06 01:19.31 [info     ] TD3_20251206002429: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.00470389199256897, 'time_algorithm_update': 0.01158761692047119, 'critic_loss': 3659052375361978.5, 'actor_loss': -200609794.4, 'time_step': 0.01653147053718567, 'td_error': 1544879075856354.0, 'value_scale': 189201755.8792959, 'discounted_advantage': -266554913.10015026, 'initial_state': 236528960.0, 'diff_eval': 113474.30846605197} step=163000
2025-12-06 01:19.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.33it/s, critic_loss=3.94e+15, actor_loss=-2.09e+8]


2025-12-06 01:19.50 [info     ] TD3_20251206002429: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.004543502807617187, 'time_algorithm_update': 0.011216317176818848, 'critic_loss': 3942529882856095.5, 'actor_loss': -208723668.128, 'time_step': 0.016014933347702025, 'td_error': 1669651373441116.8, 'value_scale': 196807010.3755239, 'discounted_advantage': -275857799.1786294, 'initial_state': 246206752.0, 'diff_eval': 113474.30846605197} step=164000
2025-12-06 01:19.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.10it/s, critic_loss=4.31e+15, actor_loss=-2.17e+8]


2025-12-06 01:20.11 [info     ] TD3_20251206002429: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.004775228023529053, 'time_algorithm_update': 0.011573955059051514, 'critic_loss': 4309306064450355.0, 'actor_loss': -217037448.992, 'time_step': 0.016609883308410643, 'td_error': 1807628225260892.0, 'value_scale': 204706682.45766973, 'discounted_advantage': -286715340.2864044, 'initial_state': 256255680.0, 'diff_eval': 113474.30846605197} step=165000
2025-12-06 01:20.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.84it/s, critic_loss=4.62e+15, actor_loss=-2.25e+8]


2025-12-06 01:20.31 [info     ] TD3_20251206002429: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.004592672109603881, 'time_algorithm_update': 0.01127568531036377, 'critic_loss': 4622993909524464.0, 'actor_loss': -225434810.656, 'time_step': 0.01612316083908081, 'td_error': 1951195739233095.5, 'value_scale': 212542612.19782063, 'discounted_advantage': -298654658.9526225, 'initial_state': 266236512.0, 'diff_eval': 113474.30846605197} step=166000
2025-12-06 01:20.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.52it/s, critic_loss=5e+15, actor_loss=-2.34e+8]  


2025-12-06 01:20.50 [info     ] TD3_20251206002429: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.004613625526428223, 'time_algorithm_update': 0.011384030103683471, 'critic_loss': 5006698694515360.0, 'actor_loss': -234332476.256, 'time_step': 0.01622810673713684, 'td_error': 2106749117212191.2, 'value_scale': 220769466.74601844, 'discounted_advantage': -310512797.1248793, 'initial_state': 276765440.0, 'diff_eval': 113474.30846605197} step=167000
2025-12-06 01:20.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.70it/s, critic_loss=5.45e+15, actor_loss=-2.43e+8]


2025-12-06 01:21.10 [info     ] TD3_20251206002429: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.004712609529495239, 'time_algorithm_update': 0.011477554559707642, 'critic_loss': 5452571196769960.0, 'actor_loss': -243116235.008, 'time_step': 0.0164475781917572, 'td_error': 2272376759462327.5, 'value_scale': 229084981.86085498, 'discounted_advantage': -322945118.8008346, 'initial_state': 287383520.0, 'diff_eval': 113474.30846605197} step=168000
2025-12-06 01:21.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.08it/s, critic_loss=5.81e+15, actor_loss=-2.53e+8]


2025-12-06 01:21.31 [info     ] TD3_20251206002429: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.004743518114089966, 'time_algorithm_update': 0.011929874658584595, 'critic_loss': 5814016661719613.0, 'actor_loss': -252534348.48, 'time_step': 0.016906688690185547, 'td_error': 2448073163330796.5, 'value_scale': 237778445.23386422, 'discounted_advantage': -334720435.7789972, 'initial_state': 298473856.0, 'diff_eval': 113474.30846605197} step=169000
2025-12-06 01:21.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.88it/s, critic_loss=6.31e+15, actor_loss=-2.62e+8]


2025-12-06 01:21.50 [info     ] TD3_20251206002429: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.004454013824462891, 'time_algorithm_update': 0.010936276912689209, 'critic_loss': 6320492531473711.0, 'actor_loss': -261831282.176, 'time_step': 0.015625118494033814, 'td_error': 2634497398490223.0, 'value_scale': 246718973.4048617, 'discounted_advantage': -346170716.56456274, 'initial_state': 309941376.0, 'diff_eval': 113474.30846605197} step=170000
2025-12-06 01:21.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.17it/s, critic_loss=6.77e+15, actor_loss=-2.72e+8]


2025-12-06 01:22.10 [info     ] TD3_20251206002429: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.004657891273498535, 'time_algorithm_update': 0.011431856632232666, 'critic_loss': 6765651844476699.0, 'actor_loss': -271773982.112, 'time_step': 0.01633548951148987, 'td_error': 2835798264844640.5, 'value_scale': 255656218.91366303, 'discounted_advantage': -360898820.22064143, 'initial_state': 321328672.0, 'diff_eval': 113474.30846605197} step=171000
2025-12-06 01:22.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.97it/s, critic_loss=7.31e+15, actor_loss=-2.82e+8]


2025-12-06 01:22.30 [info     ] TD3_20251206002429: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.004619506359100342, 'time_algorithm_update': 0.011236903190612792, 'critic_loss': 7307393781429436.0, 'actor_loss': -281912962.304, 'time_step': 0.0160958456993103, 'td_error': 3050721424889795.0, 'value_scale': 264997185.53562447, 'discounted_advantage': -375011064.9473507, 'initial_state': 333241888.0, 'diff_eval': 113474.30846605197} step=172000
2025-12-06 01:22.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.61it/s, critic_loss=7.86e+15, actor_loss=-2.92e+8]


2025-12-06 01:22.50 [info     ] TD3_20251206002429: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.0046046302318573, 'time_algorithm_update': 0.011335162878036499, 'critic_loss': 7851695251260441.0, 'actor_loss': -292330660.16, 'time_step': 0.016189186573028563, 'td_error': 3279918105435513.0, 'value_scale': 274943855.6546521, 'discounted_advantage': -387071572.9377266, 'initial_state': 346023456.0, 'diff_eval': 113474.30846605197} step=173000
2025-12-06 01:22.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.11it/s, critic_loss=8.42e+15, actor_loss=-3.03e+8]


2025-12-06 01:23.09 [info     ] TD3_20251206002429: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.00455959153175354, 'time_algorithm_update': 0.011268298864364625, 'critic_loss': 8420435426724020.0, 'actor_loss': -303043714.624, 'time_step': 0.01606912159919739, 'td_error': 3520236709211856.5, 'value_scale': 284753049.612741, 'discounted_advantage': -401334323.7812967, 'initial_state': 358641024.0, 'diff_eval': 113474.30846605197} step=174000
2025-12-06 01:23.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.43it/s, critic_loss=9.04e+15, actor_loss=-3.13e+8]


2025-12-06 01:23.29 [info     ] TD3_20251206002429: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.004540096521377563, 'time_algorithm_update': 0.011192317724227905, 'critic_loss': 9045687198099178.0, 'actor_loss': -313467504.96, 'time_step': 0.01597670125961304, 'td_error': 3781121871163255.0, 'value_scale': 294936187.41324395, 'discounted_advantage': -416057479.1916341, 'initial_state': 371629600.0, 'diff_eval': 113474.30846605197} step=175000
2025-12-06 01:23.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.13it/s, critic_loss=9.72e+15, actor_loss=-3.25e+8]


2025-12-06 01:23.49 [info     ] TD3_20251206002429: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.0047675566673278805, 'time_algorithm_update': 0.011594806909561156, 'critic_loss': 9726850950370426.0, 'actor_loss': -324753144.64, 'time_step': 0.016611451864242555, 'td_error': 4056481653602827.5, 'value_scale': 305493732.19111484, 'discounted_advantage': -430398630.11702275, 'initial_state': 385182912.0, 'diff_eval': 113474.30846605197} step=176000
2025-12-06 01:23.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.58it/s, critic_loss=1.05e+16, actor_loss=-3.36e+8]


2025-12-06 01:24.10 [info     ] TD3_20251206002429: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.004780611991882324, 'time_algorithm_update': 0.01172579550743103, 'critic_loss': 1.0484041952952058e+16, 'actor_loss': -336373702.976, 'time_step': 0.01675278973579407, 'td_error': 4355189315515674.5, 'value_scale': 316376455.52388936, 'discounted_advantage': -446627846.73213094, 'initial_state': 399200992.0, 'diff_eval': 113474.30846605197} step=177000
2025-12-06 01:24.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.95it/s, critic_loss=1.12e+16, actor_loss=-3.48e+8]


2025-12-06 01:24.30 [info     ] TD3_20251206002429: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.004713850259780884, 'time_algorithm_update': 0.011439063549041749, 'critic_loss': 1.1180869017160122e+16, 'actor_loss': -348076929.6, 'time_step': 0.016387932300567627, 'td_error': 4659141704924073.0, 'value_scale': 327166148.79798824, 'discounted_advantage': -461805928.61427826, 'initial_state': 412990272.0, 'diff_eval': 113474.30846605197} step=178000
2025-12-06 01:24.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.31it/s, critic_loss=1.19e+16, actor_loss=-3.6e+8]


2025-12-06 01:24.49 [info     ] TD3_20251206002429: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.004354679107666016, 'time_algorithm_update': 0.010925087451934814, 'critic_loss': 1.1912653703879852e+16, 'actor_loss': -360216580.16, 'time_step': 0.015515313863754272, 'td_error': 4985499994588460.0, 'value_scale': 338728743.1751886, 'discounted_advantage': -474777242.1731585, 'initial_state': 427799744.0, 'diff_eval': 113474.30846605197} step=179000
2025-12-06 01:24.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.40it/s, critic_loss=1.29e+16, actor_loss=-3.73e+8]


2025-12-06 01:25.09 [info     ] TD3_20251206002429: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.004528095483779907, 'time_algorithm_update': 0.011215715885162354, 'critic_loss': 1.2903331685008932e+16, 'actor_loss': -372564944.256, 'time_step': 0.015992169618606566, 'td_error': 5338692191319058.0, 'value_scale': 349717665.49874264, 'discounted_advantage': -496192943.71519685, 'initial_state': 441886240.0, 'diff_eval': 113474.30846605197} step=180000
2025-12-06 01:25.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.22it/s, critic_loss=1.37e+16, actor_loss=-3.85e+8]


2025-12-06 01:25.30 [info     ] TD3_20251206002429: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.0048618030548095706, 'time_algorithm_update': 0.011752965927124024, 'critic_loss': 1.3664501393895982e+16, 'actor_loss': -385125230.528, 'time_step': 0.016863157510757447, 'td_error': 5692432346986666.0, 'value_scale': 361704708.526404, 'discounted_advantage': -507492893.7058881, 'initial_state': 457340928.0, 'diff_eval': 113474.30846605197} step=181000
2025-12-06 01:25.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.57it/s, critic_loss=1.46e+16, actor_loss=-3.98e+8]


2025-12-06 01:25.49 [info     ] TD3_20251206002429: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.004629610061645507, 'time_algorithm_update': 0.011306375026702882, 'critic_loss': 1.4612614719576474e+16, 'actor_loss': -397783333.184, 'time_step': 0.016186405181884765, 'td_error': 6076334350714432.0, 'value_scale': 373643418.6957251, 'discounted_advantage': -523940006.95724523, 'initial_state': 472711168.0, 'diff_eval': 113474.30846605197} step=182000
2025-12-06 01:25.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.65it/s, critic_loss=1.56e+16, actor_loss=-4.11e+8]


2025-12-06 01:26.10 [info     ] TD3_20251206002429: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.0048026697635650635, 'time_algorithm_update': 0.011662428379058837, 'critic_loss': 1.5653235166149608e+16, 'actor_loss': -410878525.632, 'time_step': 0.01672784900665283, 'td_error': 6481694745741925.0, 'value_scale': 385753238.40737635, 'discounted_advantage': -542319720.3698857, 'initial_state': 488311552.0, 'diff_eval': 113474.30846605197} step=183000
2025-12-06 01:26.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.72it/s, critic_loss=1.66e+16, actor_loss=-4.24e+8]


2025-12-06 01:26.30 [info     ] TD3_20251206002429: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.004592638254165649, 'time_algorithm_update': 0.011354659080505371, 'critic_loss': 1.6643163281422286e+16, 'actor_loss': -424409845.376, 'time_step': 0.016180956840515136, 'td_error': 6917642378585422.0, 'value_scale': 398489343.69823974, 'discounted_advantage': -559524060.2077134, 'initial_state': 504646240.0, 'diff_eval': 113474.30846605197} step=184000
2025-12-06 01:26.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.48it/s, critic_loss=1.78e+16, actor_loss=-4.38e+8]


2025-12-06 01:26.50 [info     ] TD3_20251206002429: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.004630279779434204, 'time_algorithm_update': 0.011356847763061523, 'critic_loss': 1.7852856320877658e+16, 'actor_loss': -438068669.312, 'time_step': 0.016230500221252442, 'td_error': 7378257341722405.0, 'value_scale': 411419946.3470243, 'discounted_advantage': -577790252.4178731, 'initial_state': 521270848.0, 'diff_eval': 113474.30846605197} step=185000
2025-12-06 01:26.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.97it/s, critic_loss=1.88e+16, actor_loss=-4.52e+8]


2025-12-06 01:27.10 [info     ] TD3_20251206002429: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.004764917135238647, 'time_algorithm_update': 0.011647881746292114, 'critic_loss': 1.879918222558783e+16, 'actor_loss': -452245830.976, 'time_step': 0.01666043472290039, 'td_error': 7852468128825993.0, 'value_scale': 424781185.5289187, 'discounted_advantage': -592531195.7383213, 'initial_state': 538412928.0, 'diff_eval': 113474.30846605197} step=186000
2025-12-06 01:27.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.73it/s, critic_loss=2.03e+16, actor_loss=-4.67e+8]


2025-12-06 01:27.30 [info     ] TD3_20251206002429: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.004644855737686158, 'time_algorithm_update': 0.011290921449661255, 'critic_loss': 2.032011286617155e+16, 'actor_loss': -466643540.672, 'time_step': 0.016177133798599242, 'td_error': 8369849904436726.0, 'value_scale': 438262036.48616934, 'discounted_advantage': -613089965.71098, 'initial_state': 555723264.0, 'diff_eval': 113474.30846605197} step=187000
2025-12-06 01:27.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.72it/s, critic_loss=2.15e+16, actor_loss=-4.81e+8]


2025-12-06 01:27.50 [info     ] TD3_20251206002429: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.004614382982254028, 'time_algorithm_update': 0.011320312976837159, 'critic_loss': 2.1524966769195744e+16, 'actor_loss': -481388062.464, 'time_step': 0.016179991006851198, 'td_error': 8905836215511454.0, 'value_scale': 451899861.6865046, 'discounted_advantage': -633087777.8109264, 'initial_state': 573358080.0, 'diff_eval': 113474.30846605197} step=188000
2025-12-06 01:27.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.16it/s, critic_loss=2.3e+16, actor_loss=-4.96e+8]


2025-12-06 01:28.10 [info     ] TD3_20251206002429: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.0046435270309448246, 'time_algorithm_update': 0.011387168169021607, 'critic_loss': 2.2970144172476988e+16, 'actor_loss': -496269234.176, 'time_step': 0.016293479204177855, 'td_error': 9480489488859742.0, 'value_scale': 466107582.24308467, 'discounted_advantage': -652961799.5230172, 'initial_state': 591619264.0, 'diff_eval': 113474.30846605197} step=189000
2025-12-06 01:28.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.12it/s, critic_loss=2.43e+16, actor_loss=-5.12e+8]


2025-12-06 01:28.30 [info     ] TD3_20251206002429: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.004671316862106323, 'time_algorithm_update': 0.011423358678817748, 'critic_loss': 2.4293299820785828e+16, 'actor_loss': -511813942.912, 'time_step': 0.016333914041519165, 'td_error': 1.0070910827399496e+16, 'value_scale': 480638888.74434197, 'discounted_advantage': -669570939.3932931, 'initial_state': 610369536.0, 'diff_eval': 113474.30846605197} step=190000
2025-12-06 01:28.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.48it/s, critic_loss=2.62e+16, actor_loss=-5.27e+8]


2025-12-06 01:28.50 [info     ] TD3_20251206002429: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.004587225675582886, 'time_algorithm_update': 0.011406617641448975, 'critic_loss': 2.6185808909516668e+16, 'actor_loss': -527484905.664, 'time_step': 0.01622962260246277, 'td_error': 1.071832804270186e+16, 'value_scale': 494926322.1525566, 'discounted_advantage': -697277174.190314, 'initial_state': 628791808.0, 'diff_eval': 113474.30846605197} step=191000
2025-12-06 01:28.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.24it/s, critic_loss=2.75e+16, actor_loss=-5.43e+8]


2025-12-06 01:29.10 [info     ] TD3_20251206002429: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.004630785465240478, 'time_algorithm_update': 0.011426495552062989, 'critic_loss': 2.7496034050901016e+16, 'actor_loss': -543538244.416, 'time_step': 0.016291629552841188, 'td_error': 1.1378269872463084e+16, 'value_scale': 510341133.66638726, 'discounted_advantage': -714408399.8561637, 'initial_state': 648616064.0, 'diff_eval': 113474.30846605197} step=192000
2025-12-06 01:29.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.69it/s, critic_loss=2.93e+16, actor_loss=-5.6e+8]


2025-12-06 01:29.30 [info     ] TD3_20251206002429: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.004762395858764649, 'time_algorithm_update': 0.011679824590682984, 'critic_loss': 2.928217914320355e+16, 'actor_loss': -560142536.192, 'time_step': 0.01670617127418518, 'td_error': 1.2085265181402912e+16, 'value_scale': 526125032.21458507, 'discounted_advantage': -734308680.4354511, 'initial_state': 669033984.0, 'diff_eval': 113474.30846605197} step=193000
2025-12-06 01:29.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.64it/s, critic_loss=3.13e+16, actor_loss=-5.77e+8]


2025-12-06 01:29.50 [info     ] TD3_20251206002429: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.004467209577560425, 'time_algorithm_update': 0.011185450553894043, 'critic_loss': 3.1325743137972812e+16, 'actor_loss': -577038387.584, 'time_step': 0.015906969547271728, 'td_error': 1.2822878589345792e+16, 'value_scale': 541153479.8658843, 'discounted_advantage': -761173301.3411127, 'initial_state': 688275200.0, 'diff_eval': 113474.30846605197} step=194000
2025-12-06 01:29.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.33it/s, critic_loss=3.27e+16, actor_loss=-5.94e+8]


2025-12-06 01:30.10 [info     ] TD3_20251206002429: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.00478737211227417, 'time_algorithm_update': 0.01149775218963623, 'critic_loss': 3.2755135162051724e+16, 'actor_loss': -594050802.816, 'time_step': 0.01654532527923584, 'td_error': 1.3567738568510416e+16, 'value_scale': 557277786.4878458, 'discounted_advantage': -776921864.1616552, 'initial_state': 709001984.0, 'diff_eval': 113474.30846605197} step=195000
2025-12-06 01:30.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.70it/s, critic_loss=3.47e+16, actor_loss=-6.11e+8]


2025-12-06 01:30.30 [info     ] TD3_20251206002429: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.00469860053062439, 'time_algorithm_update': 0.011507117033004761, 'critic_loss': 3.472051508399309e+16, 'actor_loss': -611514105.856, 'time_step': 0.01645363426208496, 'td_error': 1.436932960766853e+16, 'value_scale': 573629684.9958088, 'discounted_advantage': -797851197.6315312, 'initial_state': 730241280.0, 'diff_eval': 113474.30846605197} step=196000
2025-12-06 01:30.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.77it/s, critic_loss=3.72e+16, actor_loss=-6.29e+8]


2025-12-06 01:30.50 [info     ] TD3_20251206002429: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.004510182142257691, 'time_algorithm_update': 0.011152456045150756, 'critic_loss': 3.7160598805724264e+16, 'actor_loss': -628727924.992, 'time_step': 0.015901422262191774, 'td_error': 1.5215724217330812e+16, 'value_scale': 589382485.5054485, 'discounted_advantage': -827266700.2527028, 'initial_state': 750483264.0, 'diff_eval': 113474.30846605197} step=197000
2025-12-06 01:30.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.18it/s, critic_loss=3.91e+16, actor_loss=-6.47e+8]


2025-12-06 01:31.10 [info     ] TD3_20251206002429: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.004582010269165039, 'time_algorithm_update': 0.011228760719299317, 'critic_loss': 3.908675044737286e+16, 'actor_loss': -646819589.376, 'time_step': 0.016055619716644286, 'td_error': 1.6108637482031926e+16, 'value_scale': 606796235.6345348, 'discounted_advantage': -847331179.6277472, 'initial_state': 772915712.0, 'diff_eval': 113474.30846605197} step=198000
2025-12-06 01:31.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.92it/s, critic_loss=4.15e+16, actor_loss=-6.65e+8]


2025-12-06 01:31.30 [info     ] TD3_20251206002429: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.004622738599777221, 'time_algorithm_update': 0.011517744779586793, 'critic_loss': 4.1561619699792344e+16, 'actor_loss': -665534818.688, 'time_step': 0.016389329433441163, 'td_error': 1.7046067527193678e+16, 'value_scale': 623566804.6404023, 'discounted_advantage': -875139459.0964081, 'initial_state': 794526016.0, 'diff_eval': 113474.30846605197} step=199000
2025-12-06 01:31.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.21it/s, critic_loss=4.37e+16, actor_loss=-6.84e+8]


2025-12-06 01:31.50 [info     ] TD3_20251206002429: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.004583210706710816, 'time_algorithm_update': 0.011489551067352295, 'critic_loss': 4.3690398546174936e+16, 'actor_loss': -684081463.936, 'time_step': 0.016319865942001344, 'td_error': 1.802865625512189e+16, 'value_scale': 641321198.3906119, 'discounted_advantage': -898446243.5132151, 'initial_state': 817602176.0, 'diff_eval': 113474.30846605197} step=200000
2025-12-06 01:31.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3_20251206002429\model_200000.d3
Training model:  SAC
2025-12-06 01:31.50 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=1)
2025-12-06 01:31.50 [debug    ] Buil

Epoch 1/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.88it/s, critic_loss=1.22, actor_loss=-3.88, temp=0.871, temp_loss=1.43]


2025-12-06 01:32.19 [info     ] SAC_20251206013150: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.0048367600440979005, 'time_algorithm_update': 0.020141843557357788, 'critic_loss': 1.229212403535843, 'actor_loss': -3.902109728574753, 'temp': 0.8695658468604088, 'temp_loss': 1.4296396962404252, 'time_step': 0.025250763416290282, 'td_error': 1.8953480247537788, 'value_scale': 5.827216723994707, 'discounted_advantage': -7.156195411037378, 'initial_state': 5.665152549743652, 'diff_eval': 16509.493211723497} step=1000
2025-12-06 01:32.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.79it/s, critic_loss=3.12, actor_loss=-7.89, temp=0.669, temp_loss=1.05]


2025-12-06 01:32.49 [info     ] SAC_20251206013150: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.005135216474533081, 'time_algorithm_update': 0.020540284156799316, 'critic_loss': 3.1211577212810515, 'actor_loss': -7.907376539230347, 'temp': 0.6684781237840652, 'temp_loss': 1.0517234746217727, 'time_step': 0.02595059657096863, 'td_error': 2.468205571526976, 'value_scale': 9.670111001387227, 'discounted_advantage': -11.559722409670282, 'initial_state': 9.12932014465332, 'diff_eval': 33098.70133925586} step=2000
2025-12-06 01:32.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.30it/s, critic_loss=4.9, actor_loss=-12.2, temp=0.528, temp_loss=0.738]


2025-12-06 01:33.19 [info     ] SAC_20251206013150: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.0049783034324646, 'time_algorithm_update': 0.020343228340148924, 'critic_loss': 4.92135335278511, 'actor_loss': -12.256187130928039, 'temp': 0.5278766998052598, 'temp_loss': 0.7369062793254852, 'time_step': 0.025606778860092164, 'td_error': 7.786291380970313, 'value_scale': 16.314074590802193, 'discounted_advantage': -26.57809745362728, 'initial_state': 11.768375396728516, 'diff_eval': 46560.79938872394} step=3000
2025-12-06 01:33.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.98it/s, critic_loss=8.01, actor_loss=-18.4, temp=0.429, temp_loss=0.509]


2025-12-06 01:33.48 [info     ] SAC_20251206013150: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.004792679309844971, 'time_algorithm_update': 0.0201111855506897, 'critic_loss': 8.02127885723114, 'actor_loss': -18.46453967857361, 'temp': 0.4290912467837334, 'temp_loss': 0.5077485368549823, 'time_step': 0.025197394609451293, 'td_error': 17.053787918364215, 'value_scale': 25.14172123164432, 'discounted_advantage': -45.731145647165505, 'initial_state': 16.14269256591797, 'diff_eval': 64066.59601912861} step=4000
2025-12-06 01:33.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.57it/s, critic_loss=12, actor_loss=-27.6, temp=0.357, temp_loss=0.309] 


2025-12-06 01:34.17 [info     ] SAC_20251206013150: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.004931015729904175, 'time_algorithm_update': 0.02025347399711609, 'critic_loss': 11.98265062379837, 'actor_loss': -27.637532516479492, 'temp': 0.35670826548337936, 'temp_loss': 0.30788870905339716, 'time_step': 0.025446053504943847, 'td_error': 28.00560204930751, 'value_scale': 37.402488417617434, 'discounted_advantage': -62.433230074960534, 'initial_state': 23.596298217773438, 'diff_eval': 73868.75783284842} step=5000
2025-12-06 01:34.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.24it/s, critic_loss=18.1, actor_loss=-40.2, temp=0.311, temp_loss=0.138]


2025-12-06 01:34.46 [info     ] SAC_20251206013150: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.004825887203216553, 'time_algorithm_update': 0.019972961902618408, 'critic_loss': 18.119776896476747, 'actor_loss': -40.30173378372192, 'temp': 0.3104142154157162, 'temp_loss': 0.1370246925903484, 'time_step': 0.02505897259712219, 'td_error': 43.33479565822202, 'value_scale': 53.30669419303824, 'discounted_advantage': -84.97976511600814, 'initial_state': 34.740962982177734, 'diff_eval': 85257.11568855897} step=6000
2025-12-06 01:34.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.10it/s, critic_loss=30, actor_loss=-58, temp=0.298, temp_loss=-0.033]      


2025-12-06 01:35.15 [info     ] SAC_20251206013150: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.004786853313446045, 'time_algorithm_update': 0.02008119511604309, 'critic_loss': 30.058710487365722, 'actor_loss': -58.09853416442871, 'temp': 0.29805664712190627, 'temp_loss': -0.03339947238191962, 'time_step': 0.02512750196456909, 'td_error': 96.54595840076381, 'value_scale': 75.6480249547199, 'discounted_advantage': -132.96270721372727, 'initial_state': 48.326873779296875, 'diff_eval': 86453.11926517448} step=7000
2025-12-06 01:35.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.94it/s, critic_loss=59.5, actor_loss=-84.4, temp=0.348, temp_loss=-0.151]


2025-12-06 01:35.44 [info     ] SAC_20251206013150: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.004818587064743042, 'time_algorithm_update': 0.0201475248336792, 'critic_loss': 59.72707432937622, 'actor_loss': -84.58275521850587, 'temp': 0.3488932719230652, 'temp_loss': -0.1511919639511034, 'time_step': 0.02523995637893677, 'td_error': 210.3344140612007, 'value_scale': 112.00166542711922, 'discounted_advantage': -184.1341359070552, 'initial_state': 70.23230743408203, 'diff_eval': 96039.4135079848} step=8000
2025-12-06 01:35.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.60it/s, critic_loss=144, actor_loss=-126, temp=0.486, temp_loss=-0.282]


2025-12-06 01:36.14 [info     ] SAC_20251206013150: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.004935290813446045, 'time_algorithm_update': 0.020233221769332885, 'critic_loss': 144.57308465194703, 'actor_loss': -125.74435611724853, 'temp': 0.4870486578643322, 'temp_loss': -0.2832071069255471, 'time_step': 0.025433808088302614, 'td_error': 469.92839383115336, 'value_scale': 166.08327032314662, 'discounted_advantage': -265.42528029683723, 'initial_state': 104.52493286132812, 'diff_eval': 99996.43673104687} step=9000
2025-12-06 01:36.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.16it/s, critic_loss=365, actor_loss=-188, temp=0.765, temp_loss=-0.702]


2025-12-06 01:36.45 [info     ] SAC_20251206013150: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.005363574981689453, 'time_algorithm_update': 0.020800294160842895, 'critic_loss': 367.3594665527344, 'actor_loss': -188.83523118591307, 'temp': 0.7668421034812927, 'temp_loss': -0.7044534058868885, 'time_step': 0.026435930728912354, 'td_error': 1029.3580263285646, 'value_scale': 243.58004319477962, 'discounted_advantage': -405.40830686900506, 'initial_state': 162.60948181152344, 'diff_eval': 108843.99597093146} step=10000
2025-12-06 01:36.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.19it/s, critic_loss=790, actor_loss=-274, temp=1.21, temp_loss=-1.26] 


2025-12-06 01:37.14 [info     ] SAC_20251206013150: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.004675035953521729, 'time_algorithm_update': 0.020177752017974854, 'critic_loss': 790.912283279419, 'actor_loss': -274.8760278625488, 'temp': 1.2103033369779588, 'temp_loss': -1.2638941954374314, 'time_step': 0.025090446710586547, 'td_error': 1671.5201904471442, 'value_scale': 340.81744794693697, 'discounted_advantage': -530.6711040568374, 'initial_state': 258.9853210449219, 'diff_eval': 108703.23735061735} step=11000
2025-12-06 01:37.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.11it/s, critic_loss=1.25e+3, actor_loss=-365, temp=1.8, temp_loss=-1.59]


2025-12-06 01:37.43 [info     ] SAC_20251206013150: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.004852382659912109, 'time_algorithm_update': 0.019994240522384642, 'critic_loss': 1249.426383972168, 'actor_loss': -365.9105528564453, 'temp': 1.8009487673044204, 'temp_loss': -1.5906401939988137, 'time_step': 0.02511239743232727, 'td_error': 2464.7276456117606, 'value_scale': 435.4486433670287, 'discounted_advantage': -682.2538087851226, 'initial_state': 337.93585205078125, 'diff_eval': 108198.90238995485} step=12000
2025-12-06 01:37.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.79it/s, critic_loss=1.69e+3, actor_loss=-457, temp=2.51, temp_loss=-1.57]


2025-12-06 01:38.13 [info     ] SAC_20251206013150: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.004801419019699097, 'time_algorithm_update': 0.021634170293807983, 'critic_loss': 1692.299114807129, 'actor_loss': -457.9096721191406, 'temp': 2.5172832067012787, 'temp_loss': -1.565187043517828, 'time_step': 0.026707138061523437, 'td_error': 3079.5013173979537, 'value_scale': 529.8695772719403, 'discounted_advantage': -819.0163238488083, 'initial_state': 411.53448486328125, 'diff_eval': 105239.01653843693} step=13000
2025-12-06 01:38.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.29it/s, critic_loss=2.19e+3, actor_loss=-558, temp=3.36, temp_loss=-1.43]


2025-12-06 01:38.42 [info     ] SAC_20251206013150: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.004885010004043579, 'time_algorithm_update': 0.019900441884994506, 'critic_loss': 2192.8280278930665, 'actor_loss': -558.6371146850586, 'temp': 3.3679201729297636, 'temp_loss': -1.429961893476546, 'time_step': 0.025023304224014283, 'td_error': 4404.415709099485, 'value_scale': 646.7835356335828, 'discounted_advantage': -989.5481264437124, 'initial_state': 501.919677734375, 'diff_eval': 105217.73824338302} step=14000
2025-12-06 01:38.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.92it/s, critic_loss=3.06e+3, actor_loss=-683, temp=4.48, temp_loss=-1.58]


2025-12-06 01:39.12 [info     ] SAC_20251206013150: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.0048310067653656, 'time_algorithm_update': 0.02016126585006714, 'critic_loss': 3064.9140086669922, 'actor_loss': -683.1998474121094, 'temp': 4.488780357122422, 'temp_loss': -1.5842859177663922, 'time_step': 0.025255244731903076, 'td_error': 6290.753032481281, 'value_scale': 794.7764229274856, 'discounted_advantage': -1137.4791375123173, 'initial_state': 620.017822265625, 'diff_eval': 105056.38567289343} step=15000
2025-12-06 01:39.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.44it/s, critic_loss=4.84e+3, actor_loss=-842, temp=5.96, temp_loss=-1.43]


2025-12-06 01:39.40 [info     ] SAC_20251206013150: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.004747400522232056, 'time_algorithm_update': 0.01993791341781616, 'critic_loss': 4850.160753051758, 'actor_loss': -842.4949140625, 'temp': 5.9692807970047, 'temp_loss': -1.4303198371045291, 'time_step': 0.024932355165481566, 'td_error': 11267.363934075862, 'value_scale': 983.3005545808803, 'discounted_advantage': -1554.6395690826191, 'initial_state': 755.8167114257812, 'diff_eval': 104712.0669848707} step=16000
2025-12-06 01:39.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.54it/s, critic_loss=9e+3, actor_loss=-1.06e+3, temp=8.13, temp_loss=-2.26]  


2025-12-06 01:40.10 [info     ] SAC_20251206013150: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.00488625693321228, 'time_algorithm_update': 0.020333367109298707, 'critic_loss': 9026.150587646483, 'actor_loss': -1057.5886260986329, 'temp': 8.138146691799165, 'temp_loss': -2.2600093475729226, 'time_step': 0.02549653959274292, 'td_error': 20104.827264099888, 'value_scale': 1260.950770055958, 'discounted_advantage': -1920.3626596075503, 'initial_state': 967.5750122070312, 'diff_eval': 106518.21796260556} step=17000
2025-12-06 01:40.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.06it/s, critic_loss=1.7e+4, actor_loss=-1.34e+3, temp=11.1, temp_loss=-2.43]


2025-12-06 01:40.39 [info     ] SAC_20251206013150: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.004805486917495727, 'time_algorithm_update': 0.019998645544052125, 'critic_loss': 17114.672699707033, 'actor_loss': -1344.3964323730468, 'temp': 11.120293254852294, 'temp_loss': -2.4340120133161545, 'time_step': 0.025091261625289917, 'td_error': 38606.001819571204, 'value_scale': 1606.2850120781054, 'discounted_advantage': -2652.0746261773406, 'initial_state': 1227.6925048828125, 'diff_eval': 105609.84817135244} step=18000
2025-12-06 01:40.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.52it/s, critic_loss=3.46e+4, actor_loss=-1.74e+3, temp=15.8, temp_loss=-4.41]


2025-12-06 01:41.09 [info     ] SAC_20251206013150: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.005105670928955078, 'time_algorithm_update': 0.02080297255516052, 'critic_loss': 34643.40965429688, 'actor_loss': -1739.794030883789, 'temp': 15.845647809028625, 'temp_loss': -4.415510757133364, 'time_step': 0.02617642664909363, 'td_error': 63402.28257149043, 'value_scale': 2091.7736183543816, 'discounted_advantage': -3102.2690797129976, 'initial_state': 1625.448486328125, 'diff_eval': 105043.75104356432} step=19000
2025-12-06 01:41.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.25it/s, critic_loss=7.03e+4, actor_loss=-2.28e+3, temp=23.3, temp_loss=-7.73]


2025-12-06 01:41.38 [info     ] SAC_20251206013150: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.004792802333831787, 'time_algorithm_update': 0.019992711782455444, 'critic_loss': 70676.80403710938, 'actor_loss': -2288.0572629394533, 'temp': 23.37620253753662, 'temp_loss': -7.7657846503257755, 'time_step': 0.025031471967697144, 'td_error': 130676.7420780185, 'value_scale': 2729.804878126146, 'discounted_advantage': -4518.5323879985, 'initial_state': 2171.884033203125, 'diff_eval': 105379.39889809671} step=20000
2025-12-06 01:41.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.67it/s, critic_loss=1.58e+5, actor_loss=-3.11e+3, temp=36, temp_loss=-16]   


2025-12-06 01:42.08 [info     ] SAC_20251206013150: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.004922795057296753, 'time_algorithm_update': 0.020193710803985597, 'critic_loss': 158439.0803203125, 'actor_loss': -3116.604533203125, 'temp': 36.09258376502991, 'temp_loss': -16.083097998142243, 'time_step': 0.02538030743598938, 'td_error': 288778.1252136486, 'value_scale': 3809.3301775821196, 'discounted_advantage': -5851.681574994816, 'initial_state': 3164.0263671875, 'diff_eval': 105967.16359528112} step=21000
2025-12-06 01:42.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.11it/s, critic_loss=4.14e+5, actor_loss=-4.46e+3, temp=55.9, temp_loss=-29.1]


2025-12-06 01:42.37 [info     ] SAC_20251206013150: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.0048492617607116695, 'time_algorithm_update': 0.019973968982696533, 'critic_loss': 416423.998140625, 'actor_loss': -4465.356713623047, 'temp': 56.01946653366089, 'temp_loss': -29.154239298820496, 'time_step': 0.025100393533706664, 'td_error': 755126.8077837679, 'value_scale': 5498.093642357109, 'discounted_advantage': -9566.023243456199, 'initial_state': 4716.92626953125, 'diff_eval': 107717.41299194182} step=22000
2025-12-06 01:42.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.68it/s, critic_loss=1.14e+6, actor_loss=-6.58e+3, temp=84.9, temp_loss=-46.4]


2025-12-06 01:43.06 [info     ] SAC_20251206013150: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.004891629934310913, 'time_algorithm_update': 0.020244841814041136, 'critic_loss': 1144599.5988125, 'actor_loss': -6596.947680664062, 'temp': 85.09885269927979, 'temp_loss': -46.49027120494843, 'time_step': 0.02539845824241638, 'td_error': 1894402.9068815322, 'value_scale': 8192.721924237414, 'discounted_advantage': -14052.201284391722, 'initial_state': 7265.59521484375, 'diff_eval': 107494.33987749073} step=23000
2025-12-06 01:43.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.29it/s, critic_loss=3.06e+6, actor_loss=-9.74e+3, temp=126, temp_loss=-67.7]


2025-12-06 01:43.35 [info     ] SAC_20251206013150: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.00479472017288208, 'time_algorithm_update': 0.01994873118400574, 'critic_loss': 3075172.621125, 'actor_loss': -9753.290572753906, 'temp': 126.69141410064697, 'temp_loss': -67.85574369812012, 'time_step': 0.025006113290786742, 'td_error': 4458089.850574886, 'value_scale': 11951.029799890639, 'discounted_advantage': -21040.736243930067, 'initial_state': 10814.5458984375, 'diff_eval': 107076.02048183547} step=24000
2025-12-06 01:43.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.15it/s, critic_loss=7.78e+6, actor_loss=-1.43e+4, temp=186, temp_loss=-97.2]


2025-12-06 01:44.05 [info     ] SAC_20251206013150: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.0049399354457855225, 'time_algorithm_update': 0.020514050245285034, 'critic_loss': 7816825.3, 'actor_loss': -14297.906473632813, 'temp': 186.7556806793213, 'temp_loss': -97.31106070709228, 'time_step': 0.02572093963623047, 'td_error': 10637859.520241775, 'value_scale': 17498.101845727684, 'discounted_advantage': -31003.111600566117, 'initial_state': 16122.7451171875, 'diff_eval': 107794.91447693025} step=25000
2025-12-06 01:44.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.31it/s, critic_loss=2.03e+7, actor_loss=-2.15e+4, temp=277, temp_loss=-150]


2025-12-06 01:44.35 [info     ] SAC_20251206013150: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.005050971508026123, 'time_algorithm_update': 0.020328731536865235, 'critic_loss': 20332495.062, 'actor_loss': -21555.97006640625, 'temp': 277.5028152008057, 'temp_loss': -150.4862823944092, 'time_step': 0.025638917446136476, 'td_error': 26244220.125820216, 'value_scale': 26566.67280326645, 'discounted_advantage': -46640.484579860895, 'initial_state': 25082.08984375, 'diff_eval': 107583.08017018615} step=26000
2025-12-06 01:44.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.31it/s, critic_loss=5.06e+7, actor_loss=-3.18e+4, temp=403, temp_loss=-185]


2025-12-06 01:45.03 [info     ] SAC_20251206013150: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.004802460193634033, 'time_algorithm_update': 0.019943644523620605, 'critic_loss': 50821044.968, 'actor_loss': -31821.94589453125, 'temp': 404.13222982788085, 'temp_loss': -185.6858390045166, 'time_step': 0.025003508806228637, 'td_error': 53382619.766081765, 'value_scale': 37561.054109584555, 'discounted_advantage': -64849.20430337764, 'initial_state': 35893.00390625, 'diff_eval': 106750.82457064383} step=27000
2025-12-06 01:45.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.90it/s, critic_loss=1.09e+8, actor_loss=-4.45e+4, temp=566, temp_loss=-201]


2025-12-06 01:45.33 [info     ] SAC_20251206013150: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.005023334503173828, 'time_algorithm_update': 0.020583666801452637, 'critic_loss': 109277572.42, 'actor_loss': -44538.86753515625, 'temp': 566.9252476196289, 'temp_loss': -201.41001854705812, 'time_step': 0.02589205241203308, 'td_error': 99064431.55620842, 'value_scale': 51084.46081307628, 'discounted_advantage': -85704.30168079044, 'initial_state': 49369.62890625, 'diff_eval': 106552.55929556796} step=28000
2025-12-06 01:45.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.54it/s, critic_loss=2.09e+8, actor_loss=-5.95e+4, temp=762, temp_loss=-174]


2025-12-06 01:46.04 [info     ] SAC_20251206013150: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.004751421213150024, 'time_algorithm_update': 0.01976454973220825, 'critic_loss': 209136822.52, 'actor_loss': -59550.6416171875, 'temp': 762.5074165039063, 'temp_loss': -174.00107199907302, 'time_step': 0.02480468773841858, 'td_error': 165820905.63272735, 'value_scale': 66110.37634410363, 'discounted_advantage': -110339.11078134678, 'initial_state': 64245.62109375, 'diff_eval': 104159.29257536054} step=29000
2025-12-06 01:46.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.65it/s, critic_loss=3.59e+8, actor_loss=-7.64e+4, temp=966, temp_loss=-129]


2025-12-06 01:46.33 [info     ] SAC_20251206013150: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.00482443356513977, 'time_algorithm_update': 0.020360800981521605, 'critic_loss': 359447802.128, 'actor_loss': -76515.763984375, 'temp': 966.5035280761718, 'temp_loss': -129.01148499107362, 'time_step': 0.025447522401809693, 'td_error': 265182418.53252625, 'value_scale': 84432.44069572506, 'discounted_advantage': -132068.9022582682, 'initial_state': 82587.375, 'diff_eval': 103330.44609716532} step=30000
2025-12-06 01:46.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.09it/s, critic_loss=5.93e+8, actor_loss=-9.65e+4, temp=1.19e+3, temp_loss=-114]


2025-12-06 01:47.04 [info     ] SAC_20251206013150: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.004876244068145752, 'time_algorithm_update': 0.02129708766937256, 'critic_loss': 594663654.976, 'actor_loss': -96634.096578125, 'temp': 1192.3375493164062, 'temp_loss': -114.52050010490417, 'time_step': 0.026449957370758056, 'td_error': 434956664.74379194, 'value_scale': 105167.56643571878, 'discounted_advantage': -174330.6139943425, 'initial_state': 102634.8125, 'diff_eval': 102566.40347335335} step=31000
2025-12-06 01:47.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.98it/s, critic_loss=9.68e+8, actor_loss=-1.21e+5, temp=1.46e+3, temp_loss=-110]


2025-12-06 01:47.33 [info     ] SAC_20251206013150: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.00487752366065979, 'time_algorithm_update': 0.020041759729385374, 'critic_loss': 969917069.504, 'actor_loss': -121300.5068515625, 'temp': 1456.504509033203, 'temp_loss': -109.03746306419373, 'time_step': 0.02519477915763855, 'td_error': 669405790.8406675, 'value_scale': 131684.8235999057, 'discounted_advantage': -205527.08382701155, 'initial_state': 128847.921875, 'diff_eval': 104890.64753513584} step=32000
2025-12-06 01:47.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.06it/s, critic_loss=1.55e+9, actor_loss=-1.51e+5, temp=1.76e+3, temp_loss=-119]


2025-12-06 01:48.03 [info     ] SAC_20251206013150: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.004984254121780395, 'time_algorithm_update': 0.021242379188537597, 'critic_loss': 1550005667.904, 'actor_loss': -150702.271296875, 'temp': 1763.977423828125, 'temp_loss': -120.64254411411285, 'time_step': 0.02650995421409607, 'td_error': 1041020369.8755108, 'value_scale': 161908.05149832356, 'discounted_advantage': -260199.72173502584, 'initial_state': 159209.984375, 'diff_eval': 103235.23789066041} step=33000
2025-12-06 01:48.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.78it/s, critic_loss=2.41e+9, actor_loss=-1.86e+5, temp=2.13e+3, temp_loss=-131]


2025-12-06 01:48.33 [info     ] SAC_20251206013150: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.005221692323684693, 'time_algorithm_update': 0.019823763132095336, 'critic_loss': 2415242470.784, 'actor_loss': -186355.6475, 'temp': 2134.002765991211, 'temp_loss': -130.81953254985808, 'time_step': 0.025322901010513307, 'td_error': 1586858774.6630998, 'value_scale': 198718.22812761946, 'discounted_advantage': -314172.70192059263, 'initial_state': 195479.5, 'diff_eval': 104104.86620767199} step=34000
2025-12-06 01:48.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.42it/s, critic_loss=3.79e+9, actor_loss=-2.3e+5, temp=2.59e+3, temp_loss=-159]


2025-12-06 01:49.04 [info     ] SAC_20251206013150: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.006343769550323486, 'time_algorithm_update': 0.020422868728637696, 'critic_loss': 3793347667.2, 'actor_loss': -230132.342234375, 'temp': 2587.713899658203, 'temp_loss': -157.28154508018494, 'time_step': 0.027034271478652953, 'td_error': 2413088533.063836, 'value_scale': 245235.0275369342, 'discounted_advantage': -375871.98131940054, 'initial_state': 243237.265625, 'diff_eval': 104024.04168260527} step=35000
2025-12-06 01:49.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.87it/s, critic_loss=5.91e+9, actor_loss=-2.82e+5, temp=3.11e+3, temp_loss=-182]


2025-12-06 01:49.34 [info     ] SAC_20251206013150: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.004949815511703491, 'time_algorithm_update': 0.020732844114303588, 'critic_loss': 5925091848.192, 'actor_loss': -282615.0639375, 'temp': 3110.9067653808593, 'temp_loss': -178.51031745529176, 'time_step': 0.025947494506835936, 'td_error': 3592864721.849107, 'value_scale': 295963.60715501884, 'discounted_advantage': -463666.47539181274, 'initial_state': 294119.0, 'diff_eval': 103576.8909385129} step=36000
2025-12-06 01:49.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.27it/s, critic_loss=9.05e+9, actor_loss=-3.45e+5, temp=3.71e+3, temp_loss=-214]


2025-12-06 01:50.04 [info     ] SAC_20251206013150: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.0049903509616851804, 'time_algorithm_update': 0.020384315252304077, 'critic_loss': 9073386571.264, 'actor_loss': -345517.79084375, 'temp': 3716.8802309570315, 'temp_loss': -212.70463620948792, 'time_step': 0.025647949934005737, 'td_error': 5357296398.156565, 'value_scale': 359614.7373218776, 'discounted_advantage': -561865.012916262, 'initial_state': 360063.25, 'diff_eval': 103973.07525736174} step=37000
2025-12-06 01:50.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.39it/s, critic_loss=1.37e+10, actor_loss=-4.21e+5, temp=4.46e+3, temp_loss=-247]


2025-12-06 01:50.34 [info     ] SAC_20251206013150: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.0050978131294250485, 'time_algorithm_update': 0.020880698680877687, 'critic_loss': 13699648658.432, 'actor_loss': -421214.86796875, 'temp': 4461.830748291016, 'temp_loss': -248.30055790138243, 'time_step': 0.02624748134613037, 'td_error': 8045600390.752207, 'value_scale': 436417.56580050295, 'discounted_advantage': -689477.3175048528, 'initial_state': 440329.125, 'diff_eval': 104456.24572651264} step=38000
2025-12-06 01:50.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.24it/s, critic_loss=2.05e+10, actor_loss=-5.1e+5, temp=5.31e+3, temp_loss=-271]


2025-12-06 01:51.04 [info     ] SAC_20251206013150: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.004974012613296509, 'time_algorithm_update': 0.02043487548828125, 'critic_loss': 20515988179.968, 'actor_loss': -510771.249, 'temp': 5316.508034667969, 'temp_loss': -270.1747244758606, 'time_step': 0.025672984838485718, 'td_error': 11403337643.053541, 'value_scale': 522236.2804510687, 'discounted_advantage': -794908.6732878075, 'initial_state': 529610.9375, 'diff_eval': 103797.95596063566} step=39000
2025-12-06 01:51.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.47it/s, critic_loss=3.04e+10, actor_loss=-6.13e+5, temp=6.24e+3, temp_loss=-300]


2025-12-06 01:51.33 [info     ] SAC_20251206013150: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.004884607315063476, 'time_algorithm_update': 0.020380685329437254, 'critic_loss': 30407537115.136, 'actor_loss': -613423.8895, 'temp': 6248.40474609375, 'temp_loss': -302.55023881530764, 'time_step': 0.02553400897979736, 'td_error': 16429474163.606455, 'value_scale': 623041.7608445096, 'discounted_advantage': -953408.5928095059, 'initial_state': 634552.3125, 'diff_eval': 104227.90715556612} step=40000
2025-12-06 01:51.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.76it/s, critic_loss=4.43e+10, actor_loss=-7.32e+5, temp=7.32e+3, temp_loss=-328]


2025-12-06 01:52.02 [info     ] SAC_20251206013150: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.00488929533958435, 'time_algorithm_update': 0.0201456880569458, 'critic_loss': 44413950334.976, 'actor_loss': -732079.730875, 'temp': 7326.446709472656, 'temp_loss': -331.9364145965576, 'time_step': 0.025304418087005617, 'td_error': 22898522781.256077, 'value_scale': 736093.267864627, 'discounted_advantage': -1112017.2864419185, 'initial_state': 754300.0, 'diff_eval': 103289.53360090511} step=41000
2025-12-06 01:52.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.75it/s, critic_loss=6.23e+10, actor_loss=-8.64e+5, temp=8.45e+3, temp_loss=-347]


2025-12-06 01:52.32 [info     ] SAC_20251206013150: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.004926863670349121, 'time_algorithm_update': 0.020142852067947387, 'critic_loss': 62451850428.416, 'actor_loss': -864675.40475, 'temp': 8453.880901367187, 'temp_loss': -353.52668559265135, 'time_step': 0.025337042570114137, 'td_error': 31413031028.864418, 'value_scale': 863913.1503562448, 'discounted_advantage': -1286136.2278735451, 'initial_state': 889414.875, 'diff_eval': 103288.99013605232} step=42000
2025-12-06 01:52.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.40it/s, critic_loss=8.7e+10, actor_loss=-1.01e+6, temp=9.64e+3, temp_loss=-375] 


2025-12-06 01:53.01 [info     ] SAC_20251206013150: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.004769905328750611, 'time_algorithm_update': 0.019916923999786378, 'critic_loss': 87108728127.488, 'actor_loss': -1011332.042625, 'temp': 9648.33844921875, 'temp_loss': -376.00755855560305, 'time_step': 0.024943832874298095, 'td_error': 43048494736.58966, 'value_scale': 1006050.4186137888, 'discounted_advantage': -1509021.4379814037, 'initial_state': 1041052.0625, 'diff_eval': 103591.98093355047} step=43000
2025-12-06 01:53.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.18it/s, critic_loss=1.17e+11, actor_loss=-1.17e+6, temp=1.1e+4, temp_loss=-389]


2025-12-06 01:53.30 [info     ] SAC_20251206013150: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.004828858137130737, 'time_algorithm_update': 0.01997840714454651, 'critic_loss': 117521126563.84, 'actor_loss': -1174554.201625, 'temp': 10984.1199765625, 'temp_loss': -387.1880316848755, 'time_step': 0.025075253009796142, 'td_error': 57095054872.42545, 'value_scale': 1161527.9470871752, 'discounted_advantage': -1693197.4807201447, 'initial_state': 1205104.625, 'diff_eval': 104324.7855781026} step=44000
2025-12-06 01:53.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.90it/s, critic_loss=1.58e+11, actor_loss=-1.36e+6, temp=1.25e+4, temp_loss=-486]


2025-12-06 01:53.59 [info     ] SAC_20251206013150: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.00489513111114502, 'time_algorithm_update': 0.020076180934906006, 'critic_loss': 158622463016.96, 'actor_loss': -1356919.065625, 'temp': 12484.063506835937, 'temp_loss': -486.94136779785157, 'time_step': 0.025235528230667114, 'td_error': 76083980960.4309, 'value_scale': 1336148.8922359599, 'discounted_advantage': -1961989.7038795995, 'initial_state': 1391094.5, 'diff_eval': 103419.99510697991} step=45000
2025-12-06 01:53.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.75it/s, critic_loss=2.14e+11, actor_loss=-1.56e+6, temp=1.42e+4, temp_loss=-525] 


2025-12-06 01:54.29 [info     ] SAC_20251206013150: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.0048830447196960445, 'time_algorithm_update': 0.020191341161727907, 'critic_loss': 213928674222.08, 'actor_loss': -1562639.0985, 'temp': 14162.833955078126, 'temp_loss': -523.1738421096802, 'time_step': 0.025335998773574828, 'td_error': 100736233868.18253, 'value_scale': 1535026.5103730091, 'discounted_advantage': -2240234.688018978, 'initial_state': 1603864.625, 'diff_eval': 103589.89134136795} step=46000
2025-12-06 01:54.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.55it/s, critic_loss=2.87e+11, actor_loss=-1.79e+6, temp=1.61e+4, temp_loss=-573]


2025-12-06 01:54.58 [info     ] SAC_20251206013150: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.004900535583496094, 'time_algorithm_update': 0.02028601884841919, 'critic_loss': 287948804456.448, 'actor_loss': -1794304.584125, 'temp': 16062.931252929688, 'temp_loss': -575.4199585952758, 'time_step': 0.025452650547027587, 'td_error': 133384670813.27861, 'value_scale': 1757537.3442476948, 'discounted_advantage': -2617143.0665433104, 'initial_state': 1847220.75, 'diff_eval': 103956.793966997} step=47000
2025-12-06 01:54.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.33it/s, critic_loss=3.76e+11, actor_loss=-2.05e+6, temp=1.81e+4, temp_loss=-544]


2025-12-06 01:55.27 [info     ] SAC_20251206013150: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.004779150724411011, 'time_algorithm_update': 0.0199408061504364, 'critic_loss': 376796718563.328, 'actor_loss': -2052842.631125, 'temp': 18118.37799609375, 'temp_loss': -546.9884220123291, 'time_step': 0.024978049755096435, 'td_error': 171898842945.4476, 'value_scale': 1997162.3761001676, 'discounted_advantage': -2934470.6185058886, 'initial_state': 2105437.0, 'diff_eval': 103455.73109172858} step=48000
2025-12-06 01:55.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.93it/s, critic_loss=4.92e+11, actor_loss=-2.33e+6, temp=2.01e+4, temp_loss=-610]


2025-12-06 01:55.56 [info     ] SAC_20251206013150: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.004879902124404908, 'time_algorithm_update': 0.020068145513534547, 'critic_loss': 492733707812.864, 'actor_loss': -2334416.26525, 'temp': 20123.4844375, 'temp_loss': -608.1381882171631, 'time_step': 0.025213851690292357, 'td_error': 223010010856.9145, 'value_scale': 2263579.609283319, 'discounted_advantage': -3368926.804422475, 'initial_state': 2395113.25, 'diff_eval': 103828.9501928044} step=49000
2025-12-06 01:55.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.42it/s, critic_loss=6.22e+11, actor_loss=-2.64e+6, temp=2.23e+4, temp_loss=-573] 


2025-12-06 01:56.26 [info     ] SAC_20251206013150: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.004938039541244507, 'time_algorithm_update': 0.02031588339805603, 'critic_loss': 622302755651.584, 'actor_loss': -2641155.9385, 'temp': 22291.06236328125, 'temp_loss': -580.6436820831299, 'time_step': 0.02553301811218262, 'td_error': 281125088723.6077, 'value_scale': 2549151.664658424, 'discounted_advantage': -3718057.3653389737, 'initial_state': 2706817.25, 'diff_eval': 103994.84890360404} step=50000
2025-12-06 01:56.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.83it/s, critic_loss=7.96e+11, actor_loss=-2.96e+6, temp=2.44e+4, temp_loss=-600] 


2025-12-06 01:56.55 [info     ] SAC_20251206013150: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.004835224866867065, 'time_algorithm_update': 0.020195715427398682, 'critic_loss': 797425747820.544, 'actor_loss': -2966139.2865, 'temp': 24384.41636328125, 'temp_loss': -611.4081342163086, 'time_step': 0.025296704292297363, 'td_error': 351752076238.2496, 'value_scale': 2857774.4524308466, 'discounted_advantage': -4082895.8706997433, 'initial_state': 3043471.0, 'diff_eval': 104012.3736261478} step=51000
2025-12-06 01:56.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.74it/s, critic_loss=1.01e+12, actor_loss=-3.32e+6, temp=2.67e+4, temp_loss=-734]


2025-12-06 01:57.25 [info     ] SAC_20251206013150: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.0049658997058868405, 'time_algorithm_update': 0.020129396438598634, 'critic_loss': 1011606521839.616, 'actor_loss': -3318789.3615, 'temp': 26739.474419921877, 'temp_loss': -732.3131472320557, 'time_step': 0.025351634025573732, 'td_error': 440123254526.8832, 'value_scale': 3190117.518545683, 'discounted_advantage': -4585799.20065818, 'initial_state': 3411546.0, 'diff_eval': 104233.43354911877} step=52000
2025-12-06 01:57.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:24<00:00, 40.12it/s, critic_loss=1.26e+12, actor_loss=-3.7e+6, temp=2.93e+4, temp_loss=-719] 


2025-12-06 01:57.53 [info     ] SAC_20251206013150: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.0046867549419403074, 'time_algorithm_update': 0.01957107090950012, 'critic_loss': 1260791224860.672, 'actor_loss': -3701442.182, 'temp': 29359.788453125, 'temp_loss': -719.2520771484375, 'time_step': 0.024505483865737913, 'td_error': 543742075794.755, 'value_scale': 3541674.2560771164, 'discounted_advantage': -5080278.291711098, 'initial_state': 3791397.5, 'diff_eval': 103661.5579837245} step=53000
2025-12-06 01:57.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.41it/s, critic_loss=1.57e+12, actor_loss=-4.11e+6, temp=3.21e+4, temp_loss=-781]


2025-12-06 01:58.23 [info     ] SAC_20251206013150: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.004889052152633667, 'time_algorithm_update': 0.020393986940383912, 'critic_loss': 1569582987411.456, 'actor_loss': -4114020.01575, 'temp': 32078.32815625, 'temp_loss': -797.9944076690674, 'time_step': 0.02554476523399353, 'td_error': 673404592657.3427, 'value_scale': 3923755.1067686505, 'discounted_advantage': -5721494.987482593, 'initial_state': 4207010.5, 'diff_eval': 104172.14639653104} step=54000
2025-12-06 01:58.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.64it/s, critic_loss=1.96e+12, actor_loss=-4.56e+6, temp=3.53e+4, temp_loss=-960]   


2025-12-06 01:58.52 [info     ] SAC_20251206013150: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.004879457950592041, 'time_algorithm_update': 0.02025859808921814, 'critic_loss': 1957914487816.192, 'actor_loss': -4564333.4535, 'temp': 35272.36484375, 'temp_loss': -954.3714852600098, 'time_step': 0.02539896035194397, 'td_error': 829787075046.4276, 'value_scale': 4351405.46605197, 'discounted_advantage': -6359213.376332267, 'initial_state': 4683763.0, 'diff_eval': 104232.1122216291} step=55000
2025-12-06 01:58.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.48it/s, critic_loss=2.38e+12, actor_loss=-5.06e+6, temp=3.87e+4, temp_loss=-932]   


2025-12-06 01:59.22 [info     ] SAC_20251206013150: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.004972974061965943, 'time_algorithm_update': 0.020238921403884887, 'critic_loss': 2384773466292.224, 'actor_loss': -5058176.03, 'temp': 38695.244625, 'temp_loss': -919.2350683898926, 'time_step': 0.025506520986557008, 'td_error': 1019073438323.8357, 'value_scale': 4820415.887992456, 'discounted_advantage': -6994546.409589203, 'initial_state': 5200756.0, 'diff_eval': 103992.87411563894} step=56000
2025-12-06 01:59.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.29it/s, critic_loss=2.95e+12, actor_loss=-5.59e+6, temp=4.24e+4, temp_loss=-1.03e+3]


2025-12-06 01:59.51 [info     ] SAC_20251206013150: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.004717670440673828, 'time_algorithm_update': 0.02001044201850891, 'critic_loss': 2950622234017.792, 'actor_loss': -5595803.6845, 'temp': 42458.03899609375, 'temp_loss': -1025.5278464813232, 'time_step': 0.024994762897491456, 'td_error': 1244116224473.8188, 'value_scale': 5322047.076802179, 'discounted_advantage': -7718440.985565115, 'initial_state': 5761321.0, 'diff_eval': 103977.40901380932} step=57000
2025-12-06 01:59.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.12it/s, critic_loss=3.53e+12, actor_loss=-6.16e+6, temp=4.59e+4, temp_loss=-906]   


2025-12-06 02:00.21 [info     ] SAC_20251206013150: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.0049462392330169675, 'time_algorithm_update': 0.02049400186538696, 'critic_loss': 3531994870972.416, 'actor_loss': -6161326.197, 'temp': 45915.4358671875, 'temp_loss': -905.8814846801757, 'time_step': 0.02572914695739746, 'td_error': 1492702135937.9104, 'value_scale': 5839106.686818944, 'discounted_advantage': -8375352.58646334, 'initial_state': 6339776.0, 'diff_eval': 103194.73163698944} step=58000
2025-12-06 02:00.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.07it/s, critic_loss=4.31e+12, actor_loss=-6.75e+6, temp=4.95e+4, temp_loss=-1.1e+3]


2025-12-06 02:00.51 [info     ] SAC_20251206013150: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.005015922546386719, 'time_algorithm_update': 0.0204807505607605, 'critic_loss': 4312581734137.856, 'actor_loss': -6750966.732, 'temp': 49502.6755, 'temp_loss': -1064.0025108947755, 'time_step': 0.025774369955062867, 'td_error': 1780070343284.1426, 'value_scale': 6391091.374476111, 'discounted_advantage': -9000597.772337869, 'initial_state': 6952627.0, 'diff_eval': 103297.95283329811} step=59000
2025-12-06 02:00.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.60it/s, critic_loss=5.16e+12, actor_loss=-7.37e+6, temp=5.33e+4, temp_loss=-1.12e+3]


2025-12-06 02:01.20 [info     ] SAC_20251206013150: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.004911027669906616, 'time_algorithm_update': 0.020245360136032106, 'critic_loss': 5164687843655.68, 'actor_loss': -7372733.083, 'temp': 53365.27419921875, 'temp_loss': -1121.5241086120604, 'time_step': 0.02542498517036438, 'td_error': 2137445316699.7922, 'value_scale': 6970030.1246856665, 'discounted_advantage': -10034851.737243252, 'initial_state': 7600562.0, 'diff_eval': 103474.64004134222} step=60000
2025-12-06 02:01.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.63it/s, critic_loss=6.16e+12, actor_loss=-8.04e+6, temp=5.78e+4, temp_loss=-1.23e+3]


2025-12-06 02:01.50 [info     ] SAC_20251206013150: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.0048694326877594, 'time_algorithm_update': 0.020248331785202026, 'critic_loss': 6169813381808.128, 'actor_loss': -8040047.3125, 'temp': 57857.508109375, 'temp_loss': -1233.0546516723632, 'time_step': 0.02539665412902832, 'td_error': 2528684584990.794, 'value_scale': 7596158.694048617, 'discounted_advantage': -10729033.99576079, 'initial_state': 8289993.5, 'diff_eval': 103495.21976841755} step=61000
2025-12-06 02:01.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.98it/s, critic_loss=7.28e+12, actor_loss=-8.76e+6, temp=6.25e+4, temp_loss=-1.36e+3]


2025-12-06 02:02.19 [info     ] SAC_20251206013150: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.0049531726837158205, 'time_algorithm_update': 0.020534889459609986, 'critic_loss': 7277357060587.52, 'actor_loss': -8759531.458, 'temp': 62514.99060546875, 'temp_loss': -1357.9813348999023, 'time_step': 0.02578199768066406, 'td_error': 3001454889231.3633, 'value_scale': 8272420.06496228, 'discounted_advantage': -11679850.717469031, 'initial_state': 9057430.0, 'diff_eval': 103501.02408605284} step=62000
2025-12-06 02:02.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.30it/s, critic_loss=8.67e+12, actor_loss=-9.52e+6, temp=6.76e+4, temp_loss=-1.47e+3]


2025-12-06 02:02.49 [info     ] SAC_20251206013150: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.004951483011245728, 'time_algorithm_update': 0.02035657286643982, 'critic_loss': 8682484225540.096, 'actor_loss': -9518940.073, 'temp': 67638.98137109375, 'temp_loss': -1453.932622253418, 'time_step': 0.02559579873085022, 'td_error': 3536986591543.7617, 'value_scale': 8971673.466890194, 'discounted_advantage': -12695391.228245782, 'initial_state': 9841571.0, 'diff_eval': 103360.15075260331} step=63000
2025-12-06 02:02.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.40it/s, critic_loss=1.04e+13, actor_loss=-1.03e+7, temp=7.35e+4, temp_loss=-1.72e+3]


2025-12-06 02:03.18 [info     ] SAC_20251206013150: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.00474782919883728, 'time_algorithm_update': 0.019893588781356813, 'critic_loss': 10361138997886.977, 'actor_loss': -10339758.018, 'temp': 73519.3117265625, 'temp_loss': -1739.0129404296874, 'time_step': 0.024905847549438476, 'td_error': 4206790570799.1753, 'value_scale': 9770758.233025985, 'discounted_advantage': -13888810.918406136, 'initial_state': 10751747.0, 'diff_eval': 103854.42889041413} step=64000
2025-12-06 02:03.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.69it/s, critic_loss=1.21e+13, actor_loss=-1.12e+7, temp=7.94e+4, temp_loss=-1.46e+3]


2025-12-06 02:03.47 [info     ] SAC_20251206013150: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.004957338571548462, 'time_algorithm_update': 0.02012699007987976, 'critic_loss': 12096472130519.04, 'actor_loss': -11233663.79, 'temp': 79383.2861015625, 'temp_loss': -1473.4984002685546, 'time_step': 0.025362766981124878, 'td_error': 4958255503913.637, 'value_scale': 10597501.031852473, 'discounted_advantage': -15047864.530408055, 'initial_state': 11677083.0, 'diff_eval': 103904.46478020166} step=65000
2025-12-06 02:03.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.16it/s, critic_loss=1.43e+13, actor_loss=-1.22e+7, temp=8.53e+4, temp_loss=-1.66e+3]


2025-12-06 02:04.17 [info     ] SAC_20251206013150: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.00496750283241272, 'time_algorithm_update': 0.02046159553527832, 'critic_loss': 14330014606557.184, 'actor_loss': -12168656.051, 'temp': 85339.4318828125, 'temp_loss': -1678.629072998047, 'time_step': 0.025699733018875123, 'td_error': 5802175721192.089, 'value_scale': 11465772.969195306, 'discounted_advantage': -16243022.450247357, 'initial_state': 12669452.0, 'diff_eval': 103553.56119032209} step=66000
2025-12-06 02:04.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.78it/s, critic_loss=1.68e+13, actor_loss=-1.32e+7, temp=9.18e+4, temp_loss=-1.82e+3]


2025-12-06 02:04.45 [info     ] SAC_20251206013150: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.004741086721420288, 'time_algorithm_update': 0.019665353775024413, 'critic_loss': 16868198814056.447, 'actor_loss': -13161921.928, 'temp': 91787.43509375, 'temp_loss': -1851.3827266845703, 'time_step': 0.024679626941680907, 'td_error': 6807979351487.041, 'value_scale': 12401302.071668064, 'discounted_advantage': -17675767.15234685, 'initial_state': 13744780.0, 'diff_eval': 103637.2191854225} step=67000
2025-12-06 02:04.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.01it/s, critic_loss=1.97e+13, actor_loss=-1.42e+7, temp=9.9e+4, temp_loss=-2.01e+3]


2025-12-06 02:05.15 [info     ] SAC_20251206013150: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.004819790363311768, 'time_algorithm_update': 0.02008599591255188, 'critic_loss': 19709679158427.65, 'actor_loss': -14219560.396, 'temp': 99029.1813984375, 'temp_loss': -2021.3008309936524, 'time_step': 0.02518292498588562, 'td_error': 7912708344479.815, 'value_scale': 13390539.235959765, 'discounted_advantage': -18810900.350198552, 'initial_state': 14868156.0, 'diff_eval': 103147.0185176244} step=68000
2025-12-06 02:05.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.19it/s, critic_loss=2.3e+13, actor_loss=-1.53e+7, temp=1.06e+5, temp_loss=-1.82e+3]


2025-12-06 02:05.44 [info     ] SAC_20251206013150: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.004997212886810303, 'time_algorithm_update': 0.02043925666809082, 'critic_loss': 22994911585370.113, 'actor_loss': -15354363.496, 'temp': 106126.096484375, 'temp_loss': -1768.523647216797, 'time_step': 0.02571528148651123, 'td_error': 9246924319867.943, 'value_scale': 14463250.531852473, 'discounted_advantage': -20345282.204327945, 'initial_state': 16090486.0, 'diff_eval': 104126.80616162189} step=69000
2025-12-06 02:05.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.47it/s, critic_loss=2.66e+13, actor_loss=-1.65e+7, temp=1.14e+5, temp_loss=-2.08e+3]


2025-12-06 02:06.14 [info     ] SAC_20251206013150: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.0049050416946411135, 'time_algorithm_update': 0.020335505962371825, 'critic_loss': 26632949146320.895, 'actor_loss': -16553002.542, 'temp': 113589.358078125, 'temp_loss': -2037.2085393066407, 'time_step': 0.02550618290901184, 'td_error': 10742766425279.752, 'value_scale': 15590369.636630343, 'discounted_advantage': -21887048.602903996, 'initial_state': 17376948.0, 'diff_eval': 103432.31680756257} step=70000
2025-12-06 02:06.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.10it/s, critic_loss=3.11e+13, actor_loss=-1.78e+7, temp=1.21e+5, temp_loss=-2.04e+3]


2025-12-06 02:06.44 [info     ] SAC_20251206013150: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.005137211322784424, 'time_algorithm_update': 0.02037364959716797, 'critic_loss': 31093674814210.047, 'actor_loss': -17804003.476, 'temp': 121242.2991953125, 'temp_loss': -2094.787863708496, 'time_step': 0.025787488460540773, 'td_error': 12502372538151.055, 'value_scale': 16771952.471919531, 'discounted_advantage': -23810734.449064445, 'initial_state': 18746504.0, 'diff_eval': 104127.40370993322} step=71000
2025-12-06 02:06.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.75it/s, critic_loss=3.59e+13, actor_loss=-1.91e+7, temp=1.29e+5, temp_loss=-2.16e+3]


2025-12-06 02:07.15 [info     ] SAC_20251206013150: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.005270916223526001, 'time_algorithm_update': 0.021129169702529906, 'critic_loss': 35927328109363.2, 'actor_loss': -19117486.53, 'temp': 129319.304296875, 'temp_loss': -2146.7235654907226, 'time_step': 0.0267056519985199, 'td_error': 14366475190133.783, 'value_scale': 18002361.17015926, 'discounted_advantage': -25333715.93276403, 'initial_state': 20163198.0, 'diff_eval': 104021.55086476685} step=72000
2025-12-06 02:07.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.04it/s, critic_loss=4.13e+13, actor_loss=-2.05e+7, temp=1.38e+5, temp_loss=-2.36e+3]


2025-12-06 02:07.44 [info     ] SAC_20251206013150: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.0049182016849517824, 'time_algorithm_update': 0.02059755039215088, 'critic_loss': 41317594363330.56, 'actor_loss': -20501367.776, 'temp': 137658.93359375, 'temp_loss': -2316.2058315429686, 'time_step': 0.025792040348052978, 'td_error': 16606583866721.875, 'value_scale': 19308797.224643756, 'discounted_advantage': -27465373.38736251, 'initial_state': 21661434.0, 'diff_eval': 104061.86470258895} step=73000
2025-12-06 02:07.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.58it/s, critic_loss=4.76e+13, actor_loss=-2.2e+7, temp=1.47e+5, temp_loss=-2.64e+3]


2025-12-06 02:08.14 [info     ] SAC_20251206013150: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.004879995107650757, 'time_algorithm_update': 0.020304630517959596, 'critic_loss': 47671227476606.98, 'actor_loss': -21964066.128, 'temp': 146918.680609375, 'temp_loss': -2677.2470601196287, 'time_step': 0.02546819806098938, 'td_error': 19065903322025.15, 'value_scale': 20706728.002514668, 'discounted_advantage': -29232555.363777738, 'initial_state': 23273210.0, 'diff_eval': 103725.92203307072} step=74000
2025-12-06 02:08.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.43it/s, critic_loss=5.42e+13, actor_loss=-2.35e+7, temp=1.56e+5, temp_loss=-2.44e+3]


2025-12-06 02:08.44 [info     ] SAC_20251206013150: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.004983683109283447, 'time_algorithm_update': 0.020981478214263917, 'critic_loss': 54216612852531.2, 'actor_loss': -23533759.862, 'temp': 156031.6473125, 'temp_loss': -2390.2425900878907, 'time_step': 0.02622203016281128, 'td_error': 21878907395551.73, 'value_scale': 22183069.134953897, 'discounted_advantage': -31174542.974445965, 'initial_state': 24963476.0, 'diff_eval': 103821.93896893064} step=75000
2025-12-06 02:08.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.88it/s, critic_loss=6.25e+13, actor_loss=-2.52e+7, temp=1.66e+5, temp_loss=-2.81e+3]


2025-12-06 02:09.13 [info     ] SAC_20251206013150: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.004843172073364258, 'time_algorithm_update': 0.020117341041564943, 'critic_loss': 62554558194253.82, 'actor_loss': -25171072.014, 'temp': 165763.270296875, 'temp_loss': -2834.272781616211, 'time_step': 0.025241085052490234, 'td_error': 25124550421240.957, 'value_scale': 23726281.676445935, 'discounted_advantage': -33672967.20154567, 'initial_state': 26764956.0, 'diff_eval': 103415.65116400986} step=76000
2025-12-06 02:09.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.69it/s, critic_loss=7.09e+13, actor_loss=-2.69e+7, temp=1.76e+5, temp_loss=-2.89e+3]


2025-12-06 02:09.43 [info     ] SAC_20251206013150: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.004912219524383545, 'time_algorithm_update': 0.020173250913619996, 'critic_loss': 70953576490336.25, 'actor_loss': -26893447.446, 'temp': 176180.92278125, 'temp_loss': -2840.1379631347654, 'time_step': 0.025381163597106935, 'td_error': 28690393695711.793, 'value_scale': 25380429.913243923, 'discounted_advantage': -35646475.59083266, 'initial_state': 28681950.0, 'diff_eval': 104126.31308364829} step=77000
2025-12-06 02:09.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.33it/s, critic_loss=8.14e+13, actor_loss=-2.87e+7, temp=1.87e+5, temp_loss=-3.13e+3]


2025-12-06 02:10.12 [info     ] SAC_20251206013150: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.004941749334335327, 'time_algorithm_update': 0.020413826704025268, 'critic_loss': 81406487745789.95, 'actor_loss': -28680690.22, 'temp': 187123.117484375, 'temp_loss': -3205.5889197387696, 'time_step': 0.02563410496711731, 'td_error': 32787407229300.05, 'value_scale': 27110459.340318523, 'discounted_advantage': -38097421.890879884, 'initial_state': 30698046.0, 'diff_eval': 104065.107041801} step=78000
2025-12-06 02:10.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.44it/s, critic_loss=9.32e+13, actor_loss=-3.05e+7, temp=1.98e+5, temp_loss=-3.19e+3]


2025-12-06 02:10.41 [info     ] SAC_20251206013150: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.004683019638061524, 'time_algorithm_update': 0.01995042324066162, 'critic_loss': 93303729020731.39, 'actor_loss': -30548843.552, 'temp': 198043.171625, 'temp_loss': -3151.373014404297, 'time_step': 0.024902140378952026, 'td_error': 37275372316681.0, 'value_scale': 28882020.42078793, 'discounted_advantage': -40726310.27371386, 'initial_state': 32766576.0, 'diff_eval': 103208.59626752573} step=79000
2025-12-06 02:10.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.67it/s, critic_loss=1.05e+14, actor_loss=-3.25e+7, temp=2.1e+5, temp_loss=-3.35e+3]


2025-12-06 02:11.11 [info     ] SAC_20251206013150: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.0048806297779083255, 'time_algorithm_update': 0.02022889018058777, 'critic_loss': 104826882469396.48, 'actor_loss': -32530679.43, 'temp': 209898.807984375, 'temp_loss': -3367.842537963867, 'time_step': 0.025379942178726196, 'td_error': 42370149239106.14, 'value_scale': 30774636.56244761, 'discounted_advantage': -43480819.300017044, 'initial_state': 34971864.0, 'diff_eval': 103160.13051065915} step=80000
2025-12-06 02:11.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.57it/s, critic_loss=1.19e+14, actor_loss=-3.46e+7, temp=2.23e+5, temp_loss=-3.47e+3]


2025-12-06 02:11.40 [info     ] SAC_20251206013150: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.004909586668014526, 'time_algorithm_update': 0.020223618745803834, 'critic_loss': 118785012442071.05, 'actor_loss': -34604278.962, 'temp': 223000.179109375, 'temp_loss': -3513.30608984375, 'time_step': 0.02541862630844116, 'td_error': 47986502266042.39, 'value_scale': 32783961.930427495, 'discounted_advantage': -45926875.410465084, 'initial_state': 37317812.0, 'diff_eval': 103160.95714777213} step=81000
2025-12-06 02:11.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.25it/s, critic_loss=1.34e+14, actor_loss=-3.68e+7, temp=2.36e+5, temp_loss=-3.18e+3]


2025-12-06 02:12.09 [info     ] SAC_20251206013150: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.0048308145999908444, 'time_algorithm_update': 0.01996184778213501, 'critic_loss': 133960075185225.73, 'actor_loss': -36780524.596, 'temp': 235955.62328125, 'temp_loss': -3263.0347138671873, 'time_step': 0.025045668125152586, 'td_error': 54371794198033.34, 'value_scale': 34874353.218776196, 'discounted_advantage': -48865780.33070337, 'initial_state': 39748240.0, 'diff_eval': 104189.96204046231} step=82000
2025-12-06 02:12.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.20it/s, critic_loss=1.52e+14, actor_loss=-3.91e+7, temp=2.5e+5, temp_loss=-4.05e+3]


2025-12-06 02:12.39 [info     ] SAC_20251206013150: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.005043383359909058, 'time_algorithm_update': 0.020354668378829956, 'critic_loss': 152250644227424.25, 'actor_loss': -39067713.52, 'temp': 249589.434546875, 'temp_loss': -4071.188485961914, 'time_step': 0.02567264151573181, 'td_error': 61532229913962.59, 'value_scale': 37044480.073763624, 'discounted_advantage': -52456318.05043118, 'initial_state': 42297988.0, 'diff_eval': 103318.82859545421} step=83000
2025-12-06 02:12.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.74it/s, critic_loss=1.7e+14, actor_loss=-4.15e+7, temp=2.64e+5, temp_loss=-3.52e+3]


2025-12-06 02:13.08 [info     ] SAC_20251206013150: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.004902800321578979, 'time_algorithm_update': 0.020175175666809084, 'critic_loss': 169802819916791.8, 'actor_loss': -41475359.876, 'temp': 264011.692140625, 'temp_loss': -3552.481873840332, 'time_step': 0.025355177402496338, 'td_error': 69266936313808.17, 'value_scale': 39375502.76697402, 'discounted_advantage': -54892770.57947917, 'initial_state': 45027808.0, 'diff_eval': 103189.88729676716} step=84000
2025-12-06 02:13.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.56it/s, critic_loss=1.93e+14, actor_loss=-4.4e+7, temp=2.79e+5, temp_loss=-4.4e+3] 


2025-12-06 02:13.37 [info     ] SAC_20251206013150: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.004718693733215332, 'time_algorithm_update': 0.019847746610641478, 'critic_loss': 193070376182349.8, 'actor_loss': -43968451.952, 'temp': 278884.61359375, 'temp_loss': -4396.377390502929, 'time_step': 0.02482176613807678, 'td_error': 78258665555173.34, 'value_scale': 41821529.90025147, 'discounted_advantage': -58354814.55796438, 'initial_state': 47896516.0, 'diff_eval': 103487.51095635333} step=85000
2025-12-06 02:13.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.02it/s, critic_loss=2.16e+14, actor_loss=-4.66e+7, temp=2.95e+5, temp_loss=-4.45e+3]


2025-12-06 02:14.06 [info     ] SAC_20251206013150: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.004802491903305054, 'time_algorithm_update': 0.02010030746459961, 'critic_loss': 215875691877498.88, 'actor_loss': -46634966.98, 'temp': 295470.7584375, 'temp_loss': -4448.222018554688, 'time_step': 0.02517785668373108, 'td_error': 88081316795395.83, 'value_scale': 44339096.975691535, 'discounted_advantage': -61996131.24279307, 'initial_state': 50852504.0, 'diff_eval': 103490.96480973049} step=86000
2025-12-06 02:14.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.63it/s, critic_loss=2.44e+14, actor_loss=-4.94e+7, temp=3.12e+5, temp_loss=-4.12e+3]


2025-12-06 02:14.35 [info     ] SAC_20251206013150: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.004714790105819702, 'time_algorithm_update': 0.019824781894683837, 'critic_loss': 243748098621308.94, 'actor_loss': -49364437.628, 'temp': 312026.844625, 'temp_loss': -4286.241649169922, 'time_step': 0.024794233798980714, 'td_error': 98876494912243.84, 'value_scale': 46971473.62615256, 'discounted_advantage': -65705589.184369825, 'initial_state': 53954736.0, 'diff_eval': 103830.28266688285} step=87000
2025-12-06 02:14.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.67it/s, critic_loss=2.7e+14, actor_loss=-5.22e+7, temp=3.28e+5, temp_loss=-4.69e+3]


2025-12-06 02:15.04 [info     ] SAC_20251206013150: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.004918213129043579, 'time_algorithm_update': 0.0201784245967865, 'critic_loss': 270364416451543.03, 'actor_loss': -52213608.032, 'temp': 328419.968, 'temp_loss': -4587.396402709961, 'time_step': 0.02536510133743286, 'td_error': 110697011573884.64, 'value_scale': 49742002.650461026, 'discounted_advantage': -69060246.98893511, 'initial_state': 57223644.0, 'diff_eval': 104020.62282095011} step=88000
2025-12-06 02:15.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.35it/s, critic_loss=3.03e+14, actor_loss=-5.52e+7, temp=3.46e+5, temp_loss=-4.98e+3]


2025-12-06 02:15.34 [info     ] SAC_20251206013150: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.004882051229476929, 'time_algorithm_update': 0.02042098331451416, 'critic_loss': 302826743016718.3, 'actor_loss': -55193402.52, 'temp': 346492.96778125, 'temp_loss': -5051.549407226563, 'time_step': 0.025580142974853514, 'td_error': 124020231011249.19, 'value_scale': 52556321.03855826, 'discounted_advantage': -73650999.56115927, 'initial_state': 60511968.0, 'diff_eval': 103561.82666635953} step=89000
2025-12-06 02:15.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.37it/s, critic_loss=3.4e+14, actor_loss=-5.83e+7, temp=3.64e+5, temp_loss=-4.85e+3]


2025-12-06 02:16.03 [info     ] SAC_20251206013150: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.0049567966461181645, 'time_algorithm_update': 0.020345864057540894, 'critic_loss': 339844018303139.8, 'actor_loss': -58283100.532, 'temp': 364348.9174375, 'temp_loss': -4782.203926757813, 'time_step': 0.0255782949924469, 'td_error': 138483287563149.62, 'value_scale': 55513306.9815591, 'discounted_advantage': -77810094.32018901, 'initial_state': 64026320.0, 'diff_eval': 103905.50180805298} step=90000
2025-12-06 02:16.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.31it/s, critic_loss=3.73e+14, actor_loss=-6.15e+7, temp=3.83e+5, temp_loss=-5.05e+3]


2025-12-06 02:16.32 [info     ] SAC_20251206013150: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.004771322011947632, 'time_algorithm_update': 0.01995271897315979, 'critic_loss': 373389307727052.8, 'actor_loss': -61477386.38, 'temp': 383218.94071875, 'temp_loss': -4977.610045043945, 'time_step': 0.024991253614425658, 'td_error': 153793202591417.6, 'value_scale': 58567381.76697402, 'discounted_advantage': -81607537.70584632, 'initial_state': 67677136.0, 'diff_eval': 103425.56691823601} step=91000
2025-12-06 02:16.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.67it/s, critic_loss=4.19e+14, actor_loss=-6.47e+7, temp=4.04e+5, temp_loss=-5.59e+3]


2025-12-06 02:17.02 [info     ] SAC_20251206013150: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.004714361667633057, 'time_algorithm_update': 0.020427331686019897, 'critic_loss': 418919606720135.2, 'actor_loss': -64761049.736, 'temp': 403959.0306875, 'temp_loss': -5485.0373686523435, 'time_step': 0.02541416358947754, 'td_error': 170882665656402.03, 'value_scale': 61712569.04945516, 'discounted_advantage': -86225888.1374701, 'initial_state': 71434712.0, 'diff_eval': 103329.8359134587} step=92000
2025-12-06 02:17.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.12it/s, critic_loss=4.67e+14, actor_loss=-6.82e+7, temp=4.25e+5, temp_loss=-6.04e+3]


2025-12-06 02:17.32 [info     ] SAC_20251206013150: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.00489491057395935, 'time_algorithm_update': 0.02130240869522095, 'critic_loss': 467240591684534.25, 'actor_loss': -68177678.744, 'temp': 424852.20096875, 'temp_loss': -5991.462104736328, 'time_step': 0.026479724168777467, 'td_error': 190555850361434.72, 'value_scale': 65025853.044425815, 'discounted_advantage': -91821916.28523055, 'initial_state': 75364928.0, 'diff_eval': 103521.50159450268} step=93000
2025-12-06 02:17.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.28it/s, critic_loss=5.19e+14, actor_loss=-7.17e+7, temp=4.48e+5, temp_loss=-6.2e+3]


2025-12-06 02:18.01 [info     ] SAC_20251206013150: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.004795510292053223, 'time_algorithm_update': 0.01990137195587158, 'critic_loss': 518978264169971.7, 'actor_loss': -71745096.144, 'temp': 448146.16840625, 'temp_loss': -6342.957250488281, 'time_step': 0.0249793963432312, 'td_error': 210429228292826.03, 'value_scale': 68499191.81056161, 'discounted_advantage': -95140837.84483264, 'initial_state': 79507704.0, 'diff_eval': 102985.17560507536} step=94000
2025-12-06 02:18.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.92it/s, critic_loss=5.65e+14, actor_loss=-7.54e+7, temp=4.7e+5, temp_loss=-5.22e+3]


2025-12-06 02:18.32 [info     ] SAC_20251206013150: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.005036144256591797, 'time_algorithm_update': 0.02130557632446289, 'critic_loss': 565941250201485.2, 'actor_loss': -75434652.384, 'temp': 470099.5634375, 'temp_loss': -5116.802930419922, 'time_step': 0.02661721897125244, 'td_error': 232292450747660.56, 'value_scale': 72046680.4559933, 'discounted_advantage': -99304147.83871934, 'initial_state': 83744800.0, 'diff_eval': 103303.34406689127} step=95000
2025-12-06 02:18.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.96it/s, critic_loss=6.27e+14, actor_loss=-7.92e+7, temp=4.91e+5, temp_loss=-6.11e+3]


2025-12-06 02:19.03 [info     ] SAC_20251206013150: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.005119186401367187, 'time_algorithm_update': 0.02045941710472107, 'critic_loss': 627185374912839.6, 'actor_loss': -79229123.872, 'temp': 491490.704125, 'temp_loss': -6214.419341308594, 'time_step': 0.025850682497024536, 'td_error': 257115671057748.0, 'value_scale': 75668489.984912, 'discounted_advantage': -105245151.80672455, 'initial_state': 88018144.0, 'diff_eval': 103212.71115119639} step=96000
2025-12-06 02:19.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.44it/s, critic_loss=6.95e+14, actor_loss=-8.32e+7, temp=5.17e+5, temp_loss=-6.51e+3]


2025-12-06 02:19.32 [info     ] SAC_20251206013150: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.0049013736248016354, 'time_algorithm_update': 0.020353689908981322, 'critic_loss': 695727704246321.1, 'actor_loss': -83200780.776, 'temp': 516675.67978125, 'temp_loss': -6630.9262976074215, 'time_step': 0.02551845574378967, 'td_error': 283891024038209.4, 'value_scale': 79474715.49036044, 'discounted_advantage': -110757686.52131358, 'initial_state': 92570256.0, 'diff_eval': 103820.62285260283} step=97000
2025-12-06 02:19.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.87it/s, critic_loss=7.7e+14, actor_loss=-8.73e+7, temp=5.43e+5, temp_loss=-7.53e+3]


2025-12-06 02:20.02 [info     ] SAC_20251206013150: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.004887277364730835, 'time_algorithm_update': 0.020759628534317016, 'critic_loss': 770056781496844.2, 'actor_loss': -87280648.944, 'temp': 543431.161, 'temp_loss': -7449.866757080078, 'time_step': 0.025923987865447997, 'td_error': 312240725960567.8, 'value_scale': 83448148.3470243, 'discounted_advantage': -115454649.40918614, 'initial_state': 97325664.0, 'diff_eval': 102566.00761908728} step=98000
2025-12-06 02:20.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.10it/s, critic_loss=8.34e+14, actor_loss=-9.16e+7, temp=5.69e+5, temp_loss=-6.35e+3]


2025-12-06 02:20.32 [info     ] SAC_20251206013150: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.00488387131690979, 'time_algorithm_update': 0.020579935789108276, 'critic_loss': 833572907266342.9, 'actor_loss': -91629506.448, 'temp': 569358.3740625, 'temp_loss': -6321.10086352539, 'time_step': 0.02574904465675354, 'td_error': 343906014394541.2, 'value_scale': 87642274.23973177, 'discounted_advantage': -120338910.51179953, 'initial_state': 102336288.0, 'diff_eval': 103253.54537287382} step=99000
2025-12-06 02:20.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.77it/s, critic_loss=9.21e+14, actor_loss=-9.6e+7, temp=5.96e+5, temp_loss=-8.02e+3]


2025-12-06 02:21.01 [info     ] SAC_20251206013150: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.004711879014968872, 'time_algorithm_update': 0.01968242073059082, 'critic_loss': 921916803985178.6, 'actor_loss': -96034003.776, 'temp': 596447.8358125, 'temp_loss': -7564.548312988281, 'time_step': 0.024674662828445434, 'td_error': 379315097844473.56, 'value_scale': 91959393.28751048, 'discounted_advantage': -126963600.80435956, 'initial_state': 107493456.0, 'diff_eval': 103504.72503548356} step=100000
2025-12-06 02:21.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.08it/s, critic_loss=1.02e+15, actor_loss=-1.01e+8, temp=6.28e+5, temp_loss=-8.3e+3]


2025-12-06 02:21.31 [info     ] SAC_20251206013150: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.004954002857208252, 'time_algorithm_update': 0.020564191341400147, 'critic_loss': 1021848521092890.6, 'actor_loss': -100724582.96, 'temp': 627744.6036875, 'temp_loss': -8494.134161865235, 'time_step': 0.025779287099838257, 'td_error': 417669106527984.94, 'value_scale': 96435680.51299246, 'discounted_advantage': -133571139.47169764, 'initial_state': 112845576.0, 'diff_eval': 103403.85707766132} step=101000
2025-12-06 02:21.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.41it/s, critic_loss=1.11e+15, actor_loss=-1.06e+8, temp=6.6e+5, temp_loss=-7.98e+3]


2025-12-06 02:22.00 [info     ] SAC_20251206013150: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.004784158706665039, 'time_algorithm_update': 0.019896348476409913, 'critic_loss': 1115257481266724.9, 'actor_loss': -105548000.088, 'temp': 659638.7510625, 'temp_loss': -7889.284901123046, 'time_step': 0.024939584732055663, 'td_error': 458490690015459.8, 'value_scale': 101139740.37384744, 'discounted_advantage': -138977217.1859936, 'initial_state': 118544360.0, 'diff_eval': 103081.46243063043} step=102000
2025-12-06 02:22.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.09it/s, critic_loss=1.23e+15, actor_loss=-1.1e+8, temp=6.89e+5, temp_loss=-8.42e+3]


2025-12-06 02:22.29 [info     ] SAC_20251206013150: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.004864042043685913, 'time_algorithm_update': 0.019988869190216065, 'critic_loss': 1230389150583095.2, 'actor_loss': -110509404.424, 'temp': 689387.2828125, 'temp_loss': -8198.645247802735, 'time_step': 0.025121254205703734, 'td_error': 503752295240576.94, 'value_scale': 105880266.23134954, 'discounted_advantage': -146577268.8736401, 'initial_state': 124249888.0, 'diff_eval': 103055.22223562926} step=103000
2025-12-06 02:22.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.70it/s, critic_loss=1.36e+15, actor_loss=-1.16e+8, temp=7.23e+5, temp_loss=-9.55e+3]


2025-12-06 02:22.57 [info     ] SAC_20251206013150: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.004710756301879883, 'time_algorithm_update': 0.01980975365638733, 'critic_loss': 1359904241379967.0, 'actor_loss': -115650196.16, 'temp': 723086.1684375, 'temp_loss': -9830.183859130859, 'time_step': 0.024774587392807006, 'td_error': 552730258013117.5, 'value_scale': 110769926.3285834, 'discounted_advantage': -154527908.8130731, 'initial_state': 130188080.0, 'diff_eval': 103319.59591756582} step=104000
2025-12-06 02:22.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.34it/s, critic_loss=1.46e+15, actor_loss=-1.21e+8, temp=7.58e+5, temp_loss=-7.84e+3]


2025-12-06 02:23.26 [info     ] SAC_20251206013150: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.004682348251342773, 'time_algorithm_update': 0.020020084142684937, 'critic_loss': 1459307441958158.2, 'actor_loss': -120949976.496, 'temp': 758336.6275625, 'temp_loss': -7838.448228027344, 'time_step': 0.024965173482894898, 'td_error': 602341293881044.2, 'value_scale': 115937807.22548197, 'discounted_advantage': -158613162.0248156, 'initial_state': 136394608.0, 'diff_eval': 103427.17867870603} step=105000
2025-12-06 02:23.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.78it/s, critic_loss=1.6e+15, actor_loss=-1.26e+8, temp=7.89e+5, temp_loss=-9.2e+3] 


2025-12-06 02:23.56 [info     ] SAC_20251206013150: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.0048890807628631594, 'time_algorithm_update': 0.02010415554046631, 'critic_loss': 1599216779752636.5, 'actor_loss': -126388219.472, 'temp': 789435.7451875, 'temp_loss': -9114.406718261718, 'time_step': 0.025284422159194946, 'td_error': 657830493364517.9, 'value_scale': 121147713.13327745, 'discounted_advantage': -165938112.18305477, 'initial_state': 142665888.0, 'diff_eval': 102652.21020072085} step=106000
2025-12-06 02:23.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.39it/s, critic_loss=1.74e+15, actor_loss=-1.32e+8, temp=8.25e+5, temp_loss=-1.01e+4]


2025-12-06 02:24.25 [info     ] SAC_20251206013150: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.00478748893737793, 'time_algorithm_update': 0.019877393245697022, 'critic_loss': 1740520538138214.5, 'actor_loss': -132061726.736, 'temp': 824952.04675, 'temp_loss': -10005.196465332032, 'time_step': 0.024932853698730467, 'td_error': 718505374839860.1, 'value_scale': 126580596.72757754, 'discounted_advantage': -173797576.0723572, 'initial_state': 149189728.0, 'diff_eval': 102589.55617655673} step=107000
2025-12-06 02:24.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.42it/s, critic_loss=1.92e+15, actor_loss=-1.38e+8, temp=8.65e+5, temp_loss=-1.08e+4]


2025-12-06 02:24.54 [info     ] SAC_20251206013150: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.004950325727462768, 'time_algorithm_update': 0.020304137229919434, 'critic_loss': 1922227779592519.8, 'actor_loss': -137942472.016, 'temp': 864811.952, 'temp_loss': -11010.864170898438, 'time_step': 0.025531608819961547, 'td_error': 786426909641518.9, 'value_scale': 132317319.3176865, 'discounted_advantage': -182600635.53706124, 'initial_state': 156154016.0, 'diff_eval': 102605.33539114334} step=108000
2025-12-06 02:24.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:24<00:00, 40.80it/s, critic_loss=2.09e+15, actor_loss=-1.44e+8, temp=9.07e+5, temp_loss=-1.08e+4]


2025-12-06 02:25.22 [info     ] SAC_20251206013150: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.0044642996788024905, 'time_algorithm_update': 0.019340275764465332, 'critic_loss': 2095676620214894.5, 'actor_loss': -144051077.472, 'temp': 907477.3534375, 'temp_loss': -10755.442725097657, 'time_step': 0.02408511018753052, 'td_error': 858377855452173.1, 'value_scale': 138223869.87761945, 'discounted_advantage': -190594833.41248935, 'initial_state': 163325952.0, 'diff_eval': 102923.05730865587} step=109000
2025-12-06 02:25.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.75it/s, critic_loss=2.27e+15, actor_loss=-1.5e+8, temp=9.47e+5, temp_loss=-9.87e+3]


2025-12-06 02:25.52 [info     ] SAC_20251206013150: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.004911237001419068, 'time_algorithm_update': 0.02014941954612732, 'critic_loss': 2272138254834729.0, 'actor_loss': -150406003.152, 'temp': 947532.485375, 'temp_loss': -9906.46096875, 'time_step': 0.025334076404571533, 'td_error': 933286845189941.1, 'value_scale': 144141532.1609388, 'discounted_advantage': -198595946.2534617, 'initial_state': 170484048.0, 'diff_eval': 102956.51523856925} step=110000
2025-12-06 02:25.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.31it/s, critic_loss=2.48e+15, actor_loss=-1.57e+8, temp=9.86e+5, temp_loss=-1.19e+4]


2025-12-06 02:26.21 [info     ] SAC_20251206013150: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.00474468183517456, 'time_algorithm_update': 0.019958582401275633, 'critic_loss': 2481802358950985.5, 'actor_loss': -156744546.928, 'temp': 986563.021, 'temp_loss': -11699.494032226563, 'time_step': 0.024985350608825684, 'td_error': 1018801247550751.1, 'value_scale': 150503692.92875105, 'discounted_advantage': -207893913.195574, 'initial_state': 178138416.0, 'diff_eval': 102701.69053032593} step=111000
2025-12-06 02:26.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.36it/s, critic_loss=2.7e+15, actor_loss=-1.63e+8, temp=1.03e+6, temp_loss=-1.22e+4]


2025-12-06 02:26.50 [info     ] SAC_20251206013150: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.00499704647064209, 'time_algorithm_update': 0.02034546375274658, 'critic_loss': 2698114703208481.0, 'actor_loss': -163483695.536, 'temp': 1031421.3045625, 'temp_loss': -11905.763528808593, 'time_step': 0.025603511810302734, 'td_error': 1110023006475687.6, 'value_scale': 157068162.6823135, 'discounted_advantage': -217022706.44939148, 'initial_state': 186118592.0, 'diff_eval': 103176.14796432365} step=112000
2025-12-06 02:26.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.72it/s, critic_loss=2.95e+15, actor_loss=-1.7e+8, temp=1.08e+6, temp_loss=-1.25e+4]


2025-12-06 02:27.20 [info     ] SAC_20251206013150: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.004746340274810791, 'time_algorithm_update': 0.020385576963424683, 'critic_loss': 2951084726078669.0, 'actor_loss': -170507411.328, 'temp': 1078465.1955, 'temp_loss': -12497.249077148437, 'time_step': 0.025396457195281982, 'td_error': 1206496410261750.0, 'value_scale': 163764434.357083, 'discounted_advantage': -225976505.78022212, 'initial_state': 194217824.0, 'diff_eval': 102237.64609455223} step=113000
2025-12-06 02:27.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.07it/s, critic_loss=3.19e+15, actor_loss=-1.78e+8, temp=1.13e+6, temp_loss=-1.28e+4]


2025-12-06 02:27.51 [info     ] SAC_20251206013150: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.004965969800949097, 'time_algorithm_update': 0.021971856832504272, 'critic_loss': 3193324923405730.0, 'actor_loss': -177738630.608, 'temp': 1126243.0225, 'temp_loss': -12916.942645507812, 'time_step': 0.027219210147857668, 'td_error': 1313509210454964.0, 'value_scale': 170888358.60854986, 'discounted_advantage': -235461805.72955185, 'initial_state': 202907136.0, 'diff_eval': 102930.1552693694} step=114000
2025-12-06 02:27.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.59it/s, critic_loss=3.47e+15, actor_loss=-1.85e+8, temp=1.17e+6, temp_loss=-1.14e+4]


2025-12-06 02:28.20 [info     ] SAC_20251206013150: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.004829621791839599, 'time_algorithm_update': 0.02034291172027588, 'critic_loss': 3473927003311177.5, 'actor_loss': -185186859.664, 'temp': 1173826.8555, 'temp_loss': -10973.177616699219, 'time_step': 0.025438348293304443, 'td_error': 1422976272942175.8, 'value_scale': 177881427.98994133, 'discounted_advantage': -244743576.4741525, 'initial_state': 211453136.0, 'diff_eval': 102572.24030715523} step=115000
2025-12-06 02:28.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.76it/s, critic_loss=3.75e+15, actor_loss=-1.93e+8, temp=1.22e+6, temp_loss=-1.31e+4]


2025-12-06 02:28.50 [info     ] SAC_20251206013150: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.00486889386177063, 'time_algorithm_update': 0.020190659761428833, 'critic_loss': 3746382337307312.0, 'actor_loss': -192726500.656, 'temp': 1219142.584875, 'temp_loss': -13413.660034179688, 'time_step': 0.02533379530906677, 'td_error': 1542892946628465.5, 'value_scale': 185172522.82984075, 'discounted_advantage': -254736190.66997206, 'initial_state': 220329600.0, 'diff_eval': 103047.56571923397} step=116000
2025-12-06 02:28.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.78it/s, critic_loss=4.06e+15, actor_loss=-2e+8, temp=1.27e+6, temp_loss=-1.27e+4]  


2025-12-06 02:29.19 [info     ] SAC_20251206013150: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.004914514541625977, 'time_algorithm_update': 0.020078223943710328, 'critic_loss': 4062207135440699.5, 'actor_loss': -200429318.096, 'temp': 1272234.76675, 'temp_loss': -13205.328547851563, 'time_step': 0.025286311864852907, 'td_error': 1665935755611356.0, 'value_scale': 192564378.3436714, 'discounted_advantage': -263633961.6613218, 'initial_state': 229382752.0, 'diff_eval': 102750.16468329242} step=117000
2025-12-06 02:29.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.61it/s, critic_loss=4.37e+15, actor_loss=-2.08e+8, temp=1.32e+6, temp_loss=-1.28e+4]


2025-12-06 02:29.49 [info     ] SAC_20251206013150: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.004887494564056396, 'time_algorithm_update': 0.020269289255142212, 'critic_loss': 4372596070627148.0, 'actor_loss': -208274422.208, 'temp': 1322137.5905, 'temp_loss': -12924.492784179687, 'time_step': 0.025429831504821778, 'td_error': 1798139882956628.8, 'value_scale': 200046497.56580052, 'discounted_advantage': -273911659.1088011, 'initial_state': 238486736.0, 'diff_eval': 102634.33461957725} step=118000
2025-12-06 02:29.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.95it/s, critic_loss=4.74e+15, actor_loss=-2.16e+8, temp=1.38e+6, temp_loss=-1.49e+4]


2025-12-06 02:30.18 [info     ] SAC_20251206013150: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.004844377756118775, 'time_algorithm_update': 0.02009751081466675, 'critic_loss': 4732861416002814.0, 'actor_loss': -216251059.744, 'temp': 1376293.563625, 'temp_loss': -14889.006452148438, 'time_step': 0.025206875801086426, 'td_error': 1940597012114309.2, 'value_scale': 207727810.387259, 'discounted_advantage': -285548223.2282592, 'initial_state': 247940000.0, 'diff_eval': 102305.78282462861} step=119000
2025-12-06 02:30.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.41it/s, critic_loss=5.1e+15, actor_loss=-2.25e+8, temp=1.43e+6, temp_loss=-1.36e+4]


2025-12-06 02:30.48 [info     ] SAC_20251206013150: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.004938964366912842, 'time_algorithm_update': 0.020320285081863402, 'critic_loss': 5101708439432724.0, 'actor_loss': -224657196.672, 'temp': 1431084.247125, 'temp_loss': -13843.876827148437, 'time_step': 0.025533091306686403, 'td_error': 2094859628982705.0, 'value_scale': 215818320.52640402, 'discounted_advantage': -296472847.4460178, 'initial_state': 257774640.0, 'diff_eval': 102668.22984683324} step=120000
2025-12-06 02:30.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.62it/s, critic_loss=5.51e+15, actor_loss=-2.33e+8, temp=1.49e+6, temp_loss=-1.58e+4]


2025-12-06 02:31.16 [info     ] SAC_20251206013150: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.004721363306045532, 'time_algorithm_update': 0.01980668830871582, 'critic_loss': 5504336495994995.0, 'actor_loss': -233217198.992, 'temp': 1488615.12675, 'temp_loss': -15518.891146484375, 'time_step': 0.024800963163375853, 'td_error': 2260168149342593.0, 'value_scale': 224106522.12908635, 'discounted_advantage': -308446795.2358415, 'initial_state': 267888720.0, 'diff_eval': 102537.67371539837} step=121000
2025-12-06 02:31.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.65it/s, critic_loss=5.94e+15, actor_loss=-2.42e+8, temp=1.55e+6, temp_loss=-1.57e+4]


2025-12-06 02:31.45 [info     ] SAC_20251206013150: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.004805002927780151, 'time_algorithm_update': 0.019700937271118164, 'critic_loss': 5935530267836940.0, 'actor_loss': -242148190.24, 'temp': 1551048.99375, 'temp_loss': -15936.706399414063, 'time_step': 0.024779749631881714, 'td_error': 2444151678372701.5, 'value_scale': 232753820.82816428, 'discounted_advantage': -322702659.6521467, 'initial_state': 278435712.0, 'diff_eval': 102951.0018706054} step=122000
2025-12-06 02:31.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.86it/s, critic_loss=6.39e+15, actor_loss=-2.51e+8, temp=1.62e+6, temp_loss=-1.63e+4]


2025-12-06 02:32.14 [info     ] SAC_20251206013150: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.004817304372787476, 'time_algorithm_update': 0.020168712615966797, 'critic_loss': 6394542640028713.0, 'actor_loss': -251375000.336, 'temp': 1617143.361375, 'temp_loss': -17107.505720703124, 'time_step': 0.02525573134422302, 'td_error': 2634409402832985.0, 'value_scale': 241960859.94300085, 'discounted_advantage': -332155114.56191784, 'initial_state': 289799744.0, 'diff_eval': 103207.63089359776} step=123000
2025-12-06 02:32.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.81it/s, critic_loss=6.97e+15, actor_loss=-2.61e+8, temp=1.68e+6, temp_loss=-1.85e+4]


2025-12-06 02:32.43 [info     ] SAC_20251206013150: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.004883114576339722, 'time_algorithm_update': 0.020180030107498168, 'critic_loss': 6973911494824034.0, 'actor_loss': -260682876.288, 'temp': 1684872.634875, 'temp_loss': -18097.14004589844, 'time_step': 0.02533431005477905, 'td_error': 2838189415693174.5, 'value_scale': 250979834.04526404, 'discounted_advantage': -346076733.51467985, 'initial_state': 300876320.0, 'diff_eval': 102852.1415478058} step=124000
2025-12-06 02:32.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.51it/s, critic_loss=7.44e+15, actor_loss=-2.7e+8, temp=1.75e+6, temp_loss=-1.66e+4]


2025-12-06 02:33.12 [info     ] SAC_20251206013150: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.004769354581832886, 'time_algorithm_update': 0.019850972414016725, 'critic_loss': 7441975431940538.0, 'actor_loss': -270406726.384, 'temp': 1754347.405875, 'temp_loss': -16202.97110546875, 'time_step': 0.024881693840026857, 'td_error': 3050419740692806.5, 'value_scale': 260442931.64459345, 'discounted_advantage': -356335841.8665304, 'initial_state': 312485472.0, 'diff_eval': 102825.57051305579} step=125000
2025-12-06 02:33.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.83it/s, critic_loss=8.02e+15, actor_loss=-2.8e+8, temp=1.82e+6, temp_loss=-1.76e+4]


2025-12-06 02:33.42 [info     ] SAC_20251206013150: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.004827904462814331, 'time_algorithm_update': 0.020195473670959474, 'critic_loss': 8019476219433058.0, 'actor_loss': -280419678.112, 'temp': 1821328.436625, 'temp_loss': -17859.37854589844, 'time_step': 0.02529212713241577, 'td_error': 3280579360869151.0, 'value_scale': 270041451.473596, 'discounted_advantage': -369979463.36779, 'initial_state': 324284320.0, 'diff_eval': 102840.58043441469} step=126000
2025-12-06 02:33.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.40it/s, critic_loss=8.62e+15, actor_loss=-2.91e+8, temp=1.89e+6, temp_loss=-1.85e+4]


2025-12-06 02:34.11 [info     ] SAC_20251206013150: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.004805281162261963, 'time_algorithm_update': 0.019879806995391845, 'critic_loss': 8623737772399657.0, 'actor_loss': -290738860.16, 'temp': 1890033.99175, 'temp_loss': -18190.253857421874, 'time_step': 0.024942354679107666, 'td_error': 3524444106324344.0, 'value_scale': 279696823.9530595, 'discounted_advantage': -384698575.3361805, 'initial_state': 336131872.0, 'diff_eval': 102695.33015100115} step=127000
2025-12-06 02:34.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.10it/s, critic_loss=9.27e+15, actor_loss=-3.01e+8, temp=1.96e+6, temp_loss=-1.89e+4]


2025-12-06 02:34.40 [info     ] SAC_20251206013150: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.00481716513633728, 'time_algorithm_update': 0.020031190395355225, 'critic_loss': 9272712992397460.0, 'actor_loss': -300949190.944, 'temp': 1964920.597, 'temp_loss': -19520.101938476564, 'time_step': 0.025115646123886108, 'td_error': 3785330893386925.5, 'value_scale': 289814469.18692374, 'discounted_advantage': -399291628.39270705, 'initial_state': 348651520.0, 'diff_eval': 102842.55758495962} step=128000
2025-12-06 02:34.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.56it/s, critic_loss=9.87e+15, actor_loss=-3.12e+8, temp=2.04e+6, temp_loss=-1.87e+4]


2025-12-06 02:35.09 [info     ] SAC_20251206013150: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.004906749486923218, 'time_algorithm_update': 0.020273941040039063, 'critic_loss': 9877363728456876.0, 'actor_loss': -311764594.752, 'temp': 2036618.3335, 'temp_loss': -18365.015901367187, 'time_step': 0.025473196268081665, 'td_error': 4051017292919546.0, 'value_scale': 300051492.8952221, 'discounted_advantage': -411380410.1601625, 'initial_state': 361303488.0, 'diff_eval': 102195.48674207302} step=129000
2025-12-06 02:35.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.97it/s, critic_loss=1.06e+16, actor_loss=-3.23e+8, temp=2.11e+6, temp_loss=-2.05e+4]


2025-12-06 02:35.39 [info     ] SAC_20251206013150: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.005008928060531616, 'time_algorithm_update': 0.020488909244537355, 'critic_loss': 1.0612189403956642e+16, 'actor_loss': -322640185.056, 'temp': 2113185.974125, 'temp_loss': -19633.633540039064, 'time_step': 0.025804142236709595, 'td_error': 4353070456093968.0, 'value_scale': 310620029.9446773, 'discounted_advantage': -428880219.920102, 'initial_state': 374219360.0, 'diff_eval': 103007.71682515908} step=130000
2025-12-06 02:35.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.89it/s, critic_loss=1.14e+16, actor_loss=-3.34e+8, temp=2.19e+6, temp_loss=-2.21e+4]


2025-12-06 02:36.08 [info     ] SAC_20251206013150: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.004857043981552124, 'time_algorithm_update': 0.020092350482940675, 'critic_loss': 1.1427836250392888e+16, 'actor_loss': -333829527.52, 'temp': 2192586.569, 'temp_loss': -22883.12051171875, 'time_step': 0.02523500084877014, 'td_error': 4662169713517743.0, 'value_scale': 321344222.24308467, 'discounted_advantage': -444459679.86850744, 'initial_state': 387456032.0, 'diff_eval': 103111.68612611105} step=131000
2025-12-06 02:36.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.66it/s, critic_loss=1.21e+16, actor_loss=-3.45e+8, temp=2.28e+6, temp_loss=-2.09e+4]


2025-12-06 02:36.38 [info     ] SAC_20251206013150: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.005065869092941284, 'time_algorithm_update': 0.020722327470779418, 'critic_loss': 1.2107417891867984e+16, 'actor_loss': -345187499.232, 'temp': 2275388.36675, 'temp_loss': -20241.333138671875, 'time_step': 0.02605896592140198, 'td_error': 4977987305896331.0, 'value_scale': 332674275.9228835, 'discounted_advantage': -453928176.55211407, 'initial_state': 401441344.0, 'diff_eval': 102473.47034859318} step=132000
2025-12-06 02:36.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.89it/s, critic_loss=1.3e+16, actor_loss=-3.57e+8, temp=2.35e+6, temp_loss=-1.97e+4]


2025-12-06 02:37.08 [info     ] SAC_20251206013150: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.00481289005279541, 'time_algorithm_update': 0.020863267421722412, 'critic_loss': 1.2967778241052934e+16, 'actor_loss': -357176231.072, 'temp': 2352025.67175, 'temp_loss': -20119.695196289063, 'time_step': 0.02594914650917053, 'td_error': 5326117234142164.0, 'value_scale': 343834153.9916178, 'discounted_advantage': -471694888.6215331, 'initial_state': 415174816.0, 'diff_eval': 102801.22575168058} step=133000
2025-12-06 02:37.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.94it/s, critic_loss=1.39e+16, actor_loss=-3.69e+8, temp=2.44e+6, temp_loss=-2.2e+4]


2025-12-06 02:37.39 [info     ] SAC_20251206013150: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.004679478645324707, 'time_algorithm_update': 0.020716970682144165, 'critic_loss': 1.3900640126591042e+16, 'actor_loss': -368986959.104, 'temp': 2439007.2555, 'temp_loss': -22062.57799707031, 'time_step': 0.025687756061553955, 'td_error': 5693022561982931.0, 'value_scale': 355318274.0050293, 'discounted_advantage': -489020344.0678467, 'initial_state': 429391136.0, 'diff_eval': 102627.16572698463} step=134000
2025-12-06 02:37.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.56it/s, critic_loss=1.48e+16, actor_loss=-3.81e+8, temp=2.52e+6, temp_loss=-2.24e+4]


2025-12-06 02:38.09 [info     ] SAC_20251206013150: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.0047004673480987545, 'time_algorithm_update': 0.01983924984931946, 'critic_loss': 1.4842210789539972e+16, 'actor_loss': -381447181.216, 'temp': 2525244.18775, 'temp_loss': -22713.7118046875, 'time_step': 0.02481015157699585, 'td_error': 6088282557348234.0, 'value_scale': 367304688.9253982, 'discounted_advantage': -506494818.2995947, 'initial_state': 444268384.0, 'diff_eval': 102789.22126692478} step=135000
2025-12-06 02:38.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:24<00:00, 41.65it/s, critic_loss=1.59e+16, actor_loss=-3.94e+8, temp=2.61e+6, temp_loss=-2.28e+4]


2025-12-06 02:38.36 [info     ] SAC_20251206013150: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.004340436935424805, 'time_algorithm_update': 0.01902934956550598, 'critic_loss': 1.5884393714613748e+16, 'actor_loss': -393847476.608, 'temp': 2613510.019, 'temp_loss': -23444.930497070312, 'time_step': 0.023607041597366332, 'td_error': 6492302652020128.0, 'value_scale': 379410735.2958927, 'discounted_advantage': -521716862.1301405, 'initial_state': 459130400.0, 'diff_eval': 102712.52449166345} step=136000
2025-12-06 02:38.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.25it/s, critic_loss=1.67e+16, actor_loss=-4.07e+8, temp=2.71e+6, temp_loss=-2.25e+4]


2025-12-06 02:39.05 [info     ] SAC_20251206013150: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.0046058762073516845, 'time_algorithm_update': 0.02012421178817749, 'critic_loss': 1.6739825862630704e+16, 'actor_loss': -407014177.344, 'temp': 2705665.25025, 'temp_loss': -20889.42190234375, 'time_step': 0.02501840138435364, 'td_error': 6921799619504361.0, 'value_scale': 392056686.0284996, 'discounted_advantage': -536477208.8054096, 'initial_state': 474827200.0, 'diff_eval': 102675.62052803207} step=137000
2025-12-06 02:39.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.64it/s, critic_loss=1.8e+16, actor_loss=-4.2e+8, temp=2.8e+6, temp_loss=-2.57e+4]  


2025-12-06 02:39.37 [info     ] SAC_20251206013150: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.004981153249740601, 'time_algorithm_update': 0.020682883024215698, 'critic_loss': 1.799206858044683e+16, 'actor_loss': -420473120.768, 'temp': 2799171.74575, 'temp_loss': -27479.49278515625, 'time_step': 0.025975910663604735, 'td_error': 7384174595885370.0, 'value_scale': 404743967.1751886, 'discounted_advantage': -554862559.5924993, 'initial_state': 490544128.0, 'diff_eval': 102421.24498054628} step=138000
2025-12-06 02:39.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.03it/s, critic_loss=1.91e+16, actor_loss=-4.34e+8, temp=2.89e+6, temp_loss=-2.2e+4]


2025-12-06 02:40.06 [info     ] SAC_20251206013150: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.004847428321838379, 'time_algorithm_update': 0.019977141618728638, 'critic_loss': 1.909882162562623e+16, 'actor_loss': -433814368.16, 'temp': 2894211.895, 'temp_loss': -22099.408662109374, 'time_step': 0.025118616580963135, 'td_error': 7856360728109023.0, 'value_scale': 417501121.1198659, 'discounted_advantage': -572165757.1774129, 'initial_state': 506371360.0, 'diff_eval': 102605.97978471091} step=139000
2025-12-06 02:40.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.24it/s, critic_loss=2.06e+16, actor_loss=-4.47e+8, temp=3e+6, temp_loss=-2.87e+4]  


2025-12-06 02:40.36 [info     ] SAC_20251206013150: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.004965194463729858, 'time_algorithm_update': 0.020405006170272827, 'critic_loss': 2.0555171735103604e+16, 'actor_loss': -447299504.48, 'temp': 2996923.33225, 'temp_loss': -29910.048389648437, 'time_step': 0.02564844989776611, 'td_error': 8381064795184276.0, 'value_scale': 431037324.35205364, 'discounted_advantage': -591734664.6130855, 'initial_state': 523127360.0, 'diff_eval': 102502.64077158166} step=140000
2025-12-06 02:40.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.12it/s, critic_loss=2.17e+16, actor_loss=-4.62e+8, temp=3.1e+6, temp_loss=-2.32e+4]


2025-12-06 02:41.05 [info     ] SAC_20251206013150: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.0048324172496795655, 'time_algorithm_update': 0.020008866786956788, 'critic_loss': 2.1687042707344588e+16, 'actor_loss': -461580841.344, 'temp': 3100058.6835, 'temp_loss': -23063.465298828123, 'time_step': 0.025105829000473024, 'td_error': 8927515867610470.0, 'value_scale': 444808446.94719195, 'discounted_advantage': -610816363.1324601, 'initial_state': 540111808.0, 'diff_eval': 102485.79189987574} step=141000
2025-12-06 02:41.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.79it/s, critic_loss=2.33e+16, actor_loss=-4.76e+8, temp=3.21e+6, temp_loss=-2.98e+4]


2025-12-06 02:41.34 [info     ] SAC_20251206013150: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.004903406381607056, 'time_algorithm_update': 0.020138689517974852, 'critic_loss': 2.3300868188403664e+16, 'actor_loss': -476380805.088, 'temp': 3211488.49675, 'temp_loss': -29580.71685546875, 'time_step': 0.025316657543182373, 'td_error': 9512438932104458.0, 'value_scale': 458914331.3461861, 'discounted_advantage': -632099601.508081, 'initial_state': 557650432.0, 'diff_eval': 102584.05760420578} step=142000
2025-12-06 02:41.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.77it/s, critic_loss=2.47e+16, actor_loss=-4.91e+8, temp=3.32e+6, temp_loss=-2.78e+4]


2025-12-06 02:42.04 [info     ] SAC_20251206013150: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.004887152194976807, 'time_algorithm_update': 0.020119598388671875, 'critic_loss': 2.4683158680536876e+16, 'actor_loss': -491412951.36, 'temp': 3321388.288, 'temp_loss': -28698.83809375, 'time_step': 0.0252882342338562, 'td_error': 1.0119698471023094e+16, 'value_scale': 473544818.4610226, 'discounted_advantage': -649987675.9508342, 'initial_state': 575924928.0, 'diff_eval': 102280.17258187292} step=143000
2025-12-06 02:42.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.59it/s, critic_loss=2.61e+16, actor_loss=-5.07e+8, temp=3.43e+6, temp_loss=-2.79e+4]


2025-12-06 02:42.34 [info     ] SAC_20251206013150: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.004909015655517578, 'time_algorithm_update': 0.02094088649749756, 'critic_loss': 2.614002833771253e+16, 'actor_loss': -507203154.624, 'temp': 3430875.8765, 'temp_loss': -28377.227064453124, 'time_step': 0.026122557640075685, 'td_error': 1.0756378176262182e+16, 'value_scale': 488323610.03855824, 'discounted_advantage': -668749002.4334115, 'initial_state': 594312000.0, 'diff_eval': 101841.72378355359} step=144000
2025-12-06 02:42.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.23it/s, critic_loss=2.79e+16, actor_loss=-5.23e+8, temp=3.55e+6, temp_loss=-2.96e+4]


2025-12-06 02:43.03 [info     ] SAC_20251206013150: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.004848693609237671, 'time_algorithm_update': 0.02054682421684265, 'critic_loss': 2.7908972401674684e+16, 'actor_loss': -522716309.216, 'temp': 3549419.8665, 'temp_loss': -28730.553318359376, 'time_step': 0.025660314559936524, 'td_error': 1.147625119988876e+16, 'value_scale': 504292027.3394803, 'discounted_advantage': -689901387.2068554, 'initial_state': 614088448.0, 'diff_eval': 102953.95503538872} step=145000
2025-12-06 02:43.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.59it/s, critic_loss=2.97e+16, actor_loss=-5.39e+8, temp=3.66e+6, temp_loss=-3.06e+4]


2025-12-06 02:43.33 [info     ] SAC_20251206013150: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.0048804931640625, 'time_algorithm_update': 0.02029955053329468, 'critic_loss': 2.971180040860153e+16, 'actor_loss': -539436294.592, 'temp': 3664613.52825, 'temp_loss': -30394.497490234375, 'time_step': 0.02543716025352478, 'td_error': 1.2181064506490492e+16, 'value_scale': 519441295.49036044, 'discounted_advantage': -712142133.6787903, 'initial_state': 632943040.0, 'diff_eval': 102469.27006428} step=146000
2025-12-06 02:43.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.49it/s, critic_loss=3.18e+16, actor_loss=-5.55e+8, temp=3.79e+6, temp_loss=-3.34e+4]


2025-12-06 02:44.02 [info     ] SAC_20251206013150: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.00474390459060669, 'time_algorithm_update': 0.019900450944900512, 'critic_loss': 3.1771004194992948e+16, 'actor_loss': -555421539.392, 'temp': 3790285.503, 'temp_loss': -33183.928873046876, 'time_step': 0.02491289710998535, 'td_error': 1.2967951988606206e+16, 'value_scale': 535396917.59262365, 'discounted_advantage': -738001530.8938339, 'initial_state': 652881600.0, 'diff_eval': 102833.68961007717} step=147000
2025-12-06 02:44.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.76it/s, critic_loss=3.36e+16, actor_loss=-5.72e+8, temp=3.92e+6, temp_loss=-3.33e+4]


2025-12-06 02:44.31 [info     ] SAC_20251206013150: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.004848227500915527, 'time_algorithm_update': 0.020155824184417723, 'critic_loss': 3.362928251785498e+16, 'actor_loss': -572319590.272, 'temp': 3923826.87375, 'temp_loss': -33404.22998632812, 'time_step': 0.02530109119415283, 'td_error': 1.3763870567893438e+16, 'value_scale': 551711804.6001676, 'discounted_advantage': -758863978.1381402, 'initial_state': 673287104.0, 'diff_eval': 102548.85123446761} step=148000
2025-12-06 02:44.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.13it/s, critic_loss=3.58e+16, actor_loss=-5.9e+8, temp=4.05e+6, temp_loss=-2.93e+4]


2025-12-06 02:45.00 [info     ] SAC_20251206013150: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.004817214012145996, 'time_algorithm_update': 0.020010135412216187, 'critic_loss': 3.5801809728242188e+16, 'actor_loss': -589791282.24, 'temp': 4048942.77925, 'temp_loss': -30761.12468359375, 'time_step': 0.02508337950706482, 'td_error': 1.4624253749252526e+16, 'value_scale': 568485761.9178542, 'discounted_advantage': -782965127.3442837, 'initial_state': 694115200.0, 'diff_eval': 102963.52926099111} step=149000
2025-12-06 02:45.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.95it/s, critic_loss=3.76e+16, actor_loss=-6.08e+8, temp=4.18e+6, temp_loss=-3.05e+4]


2025-12-06 02:45.30 [info     ] SAC_20251206013150: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.00482687783241272, 'time_algorithm_update': 0.02011446523666382, 'critic_loss': 3.763273771439568e+16, 'actor_loss': -607809906.368, 'temp': 4179766.4645, 'temp_loss': -31053.064236328126, 'time_step': 0.025209465980529786, 'td_error': 1.5504595875335402e+16, 'value_scale': 585800958.5984912, 'discounted_advantage': -801967448.0903959, 'initial_state': 715800768.0, 'diff_eval': 103068.50719345265} step=150000
2025-12-06 02:45.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.81it/s, critic_loss=4e+16, actor_loss=-6.26e+8, temp=4.3e+6, temp_loss=-3.35e+4]   


2025-12-06 02:45.59 [info     ] SAC_20251206013150: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.004876883506774903, 'time_algorithm_update': 0.02017117476463318, 'critic_loss': 4.003328176104145e+16, 'actor_loss': -625703269.056, 'temp': 4300308.4885, 'temp_loss': -33519.69478320312, 'time_step': 0.02530673050880432, 'td_error': 1.6424086838054046e+16, 'value_scale': 602815463.4031852, 'discounted_advantage': -825593728.0031713, 'initial_state': 736971968.0, 'diff_eval': 102884.31504705385} step=151000
2025-12-06 02:45.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.53it/s, critic_loss=4.28e+16, actor_loss=-6.43e+8, temp=4.43e+6, temp_loss=-3.82e+4]


2025-12-06 02:46.28 [info     ] SAC_20251206013150: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.004865933656692505, 'time_algorithm_update': 0.020342172861099243, 'critic_loss': 4.283451172057199e+16, 'actor_loss': -643526387.648, 'temp': 4435270.2465, 'temp_loss': -38027.20093359375, 'time_step': 0.025484457969665527, 'td_error': 1.741509199069919e+16, 'value_scale': 620123726.0888517, 'discounted_advantage': -854555538.4285309, 'initial_state': 758569728.0, 'diff_eval': 102908.24743625605} step=152000
2025-12-06 02:46.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.37it/s, critic_loss=4.51e+16, actor_loss=-6.62e+8, temp=4.58e+6, temp_loss=-3.67e+4]


2025-12-06 02:46.58 [info     ] SAC_20251206013150: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.004803743124008179, 'time_algorithm_update': 0.020558327674865724, 'critic_loss': 4.510470009743265e+16, 'actor_loss': -662097142.464, 'temp': 4582066.3915, 'temp_loss': -36534.994822265624, 'time_step': 0.025623079538345337, 'td_error': 1.843915351507626e+16, 'value_scale': 638223006.6856664, 'discounted_advantage': -877958116.4599955, 'initial_state': 781248576.0, 'diff_eval': 102323.46502804857} step=153000
2025-12-06 02:46.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.88it/s, critic_loss=4.77e+16, actor_loss=-6.81e+8, temp=4.72e+6, temp_loss=-3.31e+4]


2025-12-06 02:47.27 [info     ] SAC_20251206013150: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.0048425452709198, 'time_algorithm_update': 0.02013377380371094, 'critic_loss': 4.769717818806292e+16, 'actor_loss': -681086547.776, 'temp': 4724811.8845, 'temp_loss': -34050.516703125, 'time_step': 0.025259661197662354, 'td_error': 1.9492756411340216e+16, 'value_scale': 656544445.1433362, 'discounted_advantage': -898690250.3423783, 'initial_state': 804176768.0, 'diff_eval': 102619.77822513372} step=154000
2025-12-06 02:47.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.47it/s, critic_loss=5.06e+16, actor_loss=-7e+8, temp=4.86e+6, temp_loss=-3.78e+4]  


2025-12-06 02:47.57 [info     ] SAC_20251206013150: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.004706220626831055, 'time_algorithm_update': 0.021321885108947752, 'critic_loss': 5.060610199367189e+16, 'actor_loss': -700296362.048, 'temp': 4858478.2315, 'temp_loss': -37592.60362890625, 'time_step': 0.026285902738571166, 'td_error': 2.0611934789549988e+16, 'value_scale': 674733089.2472758, 'discounted_advantage': -927351245.93914, 'initial_state': 826783360.0, 'diff_eval': 103046.11921076164} step=155000
2025-12-06 02:47.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.79it/s, critic_loss=5.32e+16, actor_loss=-7.2e+8, temp=5e+6, temp_loss=-3.5e+4]    


2025-12-06 02:48.26 [info     ] SAC_20251206013150: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.004724675416946411, 'time_algorithm_update': 0.01974379301071167, 'critic_loss': 5.320458127674861e+16, 'actor_loss': -719636231.104, 'temp': 5002531.3465, 'temp_loss': -34580.944203125, 'time_step': 0.024707539319992065, 'td_error': 2.1771027313515668e+16, 'value_scale': 693408065.8306789, 'discounted_advantage': -953307137.0479318, 'initial_state': 850195840.0, 'diff_eval': 102812.1344063244} step=156000
2025-12-06 02:48.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.21it/s, critic_loss=5.66e+16, actor_loss=-7.39e+8, temp=5.15e+6, temp_loss=-4.15e+4]


2025-12-06 02:48.57 [info     ] SAC_20251206013150: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.00512957501411438, 'time_algorithm_update': 0.02169067668914795, 'critic_loss': 5.6670387078065816e+16, 'actor_loss': -739140999.744, 'temp': 5155332.622, 'temp_loss': -42574.48143945313, 'time_step': 0.027110258102416993, 'td_error': 2.2984404166104436e+16, 'value_scale': 712445346.8432523, 'discounted_advantage': -980057235.7028668, 'initial_state': 874246912.0, 'diff_eval': 102317.68032311514} step=157000
2025-12-06 02:48.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.88it/s, critic_loss=5.97e+16, actor_loss=-7.59e+8, temp=5.32e+6, temp_loss=-3.83e+4]


2025-12-06 02:49.29 [info     ] SAC_20251206013150: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.005495126008987427, 'time_algorithm_update': 0.022327813148498535, 'critic_loss': 5.971891124722624e+16, 'actor_loss': -759562143.168, 'temp': 5317249.268, 'temp_loss': -38366.22890234375, 'time_step': 0.028118753910064697, 'td_error': 2.426277239766104e+16, 'value_scale': 731727853.7870914, 'discounted_advantage': -1009138365.8066515, 'initial_state': 898545728.0, 'diff_eval': 102396.0249682444} step=158000
2025-12-06 02:49.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.21it/s, critic_loss=6.29e+16, actor_loss=-7.8e+8, temp=5.48e+6, temp_loss=-3.9e+4] 


2025-12-06 02:50.00 [info     ] SAC_20251206013150: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.00489104962348938, 'time_algorithm_update': 0.020547216415405273, 'critic_loss': 6.294446279711785e+16, 'actor_loss': -780489087.488, 'temp': 5476336.1665, 'temp_loss': -39420.09744140625, 'time_step': 0.025703289985656737, 'td_error': 2.5612718810542948e+16, 'value_scale': 751798617.4082146, 'discounted_advantage': -1036584487.261913, 'initial_state': 923727616.0, 'diff_eval': 102222.28047940737} step=159000
2025-12-06 02:50.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.08it/s, critic_loss=6.59e+16, actor_loss=-8.02e+8, temp=5.63e+6, temp_loss=-3.92e+4]


2025-12-06 02:50.29 [info     ] SAC_20251206013150: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.005090700626373291, 'time_algorithm_update': 0.020382083654403686, 'critic_loss': 6.594592233883907e+16, 'actor_loss': -802566699.52, 'temp': 5633821.0085, 'temp_loss': -41251.59975, 'time_step': 0.025760456562042237, 'td_error': 2.704376104990136e+16, 'value_scale': 772589885.0896899, 'discounted_advantage': -1063815530.6917175, 'initial_state': 950018624.0, 'diff_eval': 102694.30063778636} step=160000
2025-12-06 02:50.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.94it/s, critic_loss=7.04e+16, actor_loss=-8.23e+8, temp=5.8e+6, temp_loss=-4.2e+4] 


2025-12-06 02:50.59 [info     ] SAC_20251206013150: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.004875905752182007, 'time_algorithm_update': 0.02005339217185974, 'critic_loss': 7.042738533325131e+16, 'actor_loss': -823121580.544, 'temp': 5802876.6115, 'temp_loss': -41327.02703515625, 'time_step': 0.025209226369857788, 'td_error': 2.853090785621896e+16, 'value_scale': 793081071.3025985, 'discounted_advantage': -1095009977.5916932, 'initial_state': 975700288.0, 'diff_eval': 102887.8702020817} step=161000
2025-12-06 02:50.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.92it/s, critic_loss=7.39e+16, actor_loss=-8.45e+8, temp=5.97e+6, temp_loss=-4.28e+4]


2025-12-06 02:51.29 [info     ] SAC_20251206013150: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.00515748405456543, 'time_algorithm_update': 0.02107057523727417, 'critic_loss': 7.39397156763127e+16, 'actor_loss': -845229514.304, 'temp': 5972342.1805, 'temp_loss': -43658.7039453125, 'time_step': 0.026530264854431152, 'td_error': 3.006365623860062e+16, 'value_scale': 814334471.7317686, 'discounted_advantage': -1121776026.0124512, 'initial_state': 1002389248.0, 'diff_eval': 102636.32380851137} step=162000
2025-12-06 02:51.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.40it/s, critic_loss=7.77e+16, actor_loss=-8.67e+8, temp=6.14e+6, temp_loss=-4.66e+4]


2025-12-06 02:51.59 [info     ] SAC_20251206013150: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.004945036411285401, 'time_algorithm_update': 0.020325151920318602, 'critic_loss': 7.77640088343203e+16, 'actor_loss': -867171045.184, 'temp': 6144110.709, 'temp_loss': -48719.6149765625, 'time_step': 0.025542476415634154, 'td_error': 3.164600916489456e+16, 'value_scale': 835557460.9488684, 'discounted_advantage': -1150517057.0301116, 'initial_state': 1029024128.0, 'diff_eval': 101824.1808837917} step=163000
2025-12-06 02:51.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:26<00:00, 37.43it/s, critic_loss=8.21e+16, actor_loss=-8.9e+8, temp=6.33e+6, temp_loss=-4.66e+4]


2025-12-06 02:52.29 [info     ] SAC_20251206013150: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.005099201202392578, 'time_algorithm_update': 0.02082361555099487, 'critic_loss': 8.205665042700435e+16, 'actor_loss': -889994053.76, 'temp': 6327174.9225, 'temp_loss': -45339.5599609375, 'time_step': 0.02620847201347351, 'td_error': 3.3362318121175916e+16, 'value_scale': 857690839.3763621, 'discounted_advantage': -1181837467.1421802, 'initial_state': 1056850432.0, 'diff_eval': 102316.58657544816} step=164000
2025-12-06 02:52.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.49it/s, critic_loss=8.61e+16, actor_loss=-9.13e+8, temp=6.51e+6, temp_loss=-4.53e+4]


2025-12-06 02:52.59 [info     ] SAC_20251206013150: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.004917158842086792, 'time_algorithm_update': 0.02030390524864197, 'critic_loss': 8.608493558432373e+16, 'actor_loss': -913585051.904, 'temp': 6508964.9375, 'temp_loss': -47347.6123359375, 'time_step': 0.02549976396560669, 'td_error': 3.514878990930551e+16, 'value_scale': 880232551.8591785, 'discounted_advantage': -1212810519.800362, 'initial_state': 1085080448.0, 'diff_eval': 102666.14007529868} step=165000
2025-12-06 02:52.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.24it/s, critic_loss=9.1e+16, actor_loss=-9.37e+8, temp=6.68e+6, temp_loss=-4.94e+4]


2025-12-06 02:53.28 [info     ] SAC_20251206013150: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.00480815315246582, 'time_algorithm_update': 0.01992210268974304, 'critic_loss': 9.108393699038566e+16, 'actor_loss': -937398262.144, 'temp': 6685676.661, 'temp_loss': -49187.43227734375, 'time_step': 0.02499517512321472, 'td_error': 3.700026939904137e+16, 'value_scale': 902948114.4677284, 'discounted_advantage': -1245795680.1597896, 'initial_state': 1113640192.0, 'diff_eval': 102263.60171404129} step=166000
2025-12-06 02:53.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:26<00:00, 38.11it/s, critic_loss=9.58e+16, actor_loss=-9.61e+8, temp=6.89e+6, temp_loss=-4.82e+4]


2025-12-06 02:53.57 [info     ] SAC_20251206013150: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.005013300895690918, 'time_algorithm_update': 0.02044153928756714, 'critic_loss': 9.579653473001642e+16, 'actor_loss': -961563464.0, 'temp': 6886233.838, 'temp_loss': -48545.7312890625, 'time_step': 0.02571787738800049, 'td_error': 3.902539427788397e+16, 'value_scale': 926719445.7937971, 'discounted_advantage': -1282488682.2855496, 'initial_state': 1143567104.0, 'diff_eval': 102912.07651075673} step=167000
2025-12-06 02:53.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.50it/s, critic_loss=1.01e+17, actor_loss=-9.86e+8, temp=7.08e+6, temp_loss=-5.01e+4]


2025-12-06 02:54.27 [info     ] SAC_20251206013150: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.004943299770355225, 'time_algorithm_update': 0.020296799421310425, 'critic_loss': 1.00790741509467e+17, 'actor_loss': -986457999.36, 'temp': 7077445.4235, 'temp_loss': -52478.34013671875, 'time_step': 0.02550924825668335, 'td_error': 4.100760602811814e+16, 'value_scale': 950588518.3168483, 'discounted_advantage': -1310119334.2714472, 'initial_state': 1173659392.0, 'diff_eval': 102330.85053307992} step=168000
2025-12-06 02:54.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:27<00:00, 35.90it/s, critic_loss=1.06e+17, actor_loss=-1.01e+9, temp=7.27e+6, temp_loss=-4.56e+4]


2025-12-06 02:54.58 [info     ] SAC_20251206013150: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.006208526849746704, 'time_algorithm_update': 0.02078861403465271, 'critic_loss': 1.0578179140635066e+17, 'actor_loss': -1011954736.512, 'temp': 7268313.4865, 'temp_loss': -45046.0393359375, 'time_step': 0.027294816732406618, 'td_error': 4.316073296038611e+16, 'value_scale': 974925887.879296, 'discounted_advantage': -1344933355.0428824, 'initial_state': 1204269440.0, 'diff_eval': 102622.69333674821} step=169000
2025-12-06 02:54.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.64it/s, critic_loss=1.11e+17, actor_loss=-1.04e+9, temp=7.47e+6, temp_loss=-5.29e+4]


2025-12-06 02:55.28 [info     ] SAC_20251206013150: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.0048946506977081295, 'time_algorithm_update': 0.02024326467514038, 'critic_loss': 1.1061327947143683e+17, 'actor_loss': -1037524386.688, 'temp': 7471440.7845, 'temp_loss': -51566.11344921875, 'time_step': 0.025411045551300047, 'td_error': 4.538396994765695e+16, 'value_scale': 1000202775.255658, 'discounted_advantage': -1373623679.1586647, 'initial_state': 1236120832.0, 'diff_eval': 102767.24272877054} step=170000
2025-12-06 02:55.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.01it/s, critic_loss=1.16e+17, actor_loss=-1.06e+9, temp=7.66e+6, temp_loss=-5.22e+4]


2025-12-06 02:55.57 [info     ] SAC_20251206013150: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.004856992959976197, 'time_algorithm_update': 0.02008369183540344, 'critic_loss': 1.1645513911952526e+17, 'actor_loss': -1064083295.744, 'temp': 7663841.744, 'temp_loss': -51535.38579296875, 'time_step': 0.025196738004684447, 'td_error': 4.765734046126364e+16, 'value_scale': 1024474818.5481979, 'discounted_advantage': -1411468379.1186643, 'initial_state': 1266608000.0, 'diff_eval': 102438.58297743041} step=171000
2025-12-06 02:55.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.83it/s, critic_loss=1.23e+17, actor_loss=-1.09e+9, temp=7.87e+6, temp_loss=-5.21e+4]


2025-12-06 02:56.26 [info     ] SAC_20251206013150: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.004842993021011353, 'time_algorithm_update': 0.020177833080291748, 'critic_loss': 1.2301707822695344e+17, 'actor_loss': -1089926473.664, 'temp': 7874268.388, 'temp_loss': -52851.24457421875, 'time_step': 0.025284622192382814, 'td_error': 5.0082893824076456e+16, 'value_scale': 1049889878.6119027, 'discounted_advantage': -1449919760.0175962, 'initial_state': 1298693120.0, 'diff_eval': 102528.1032003529} step=172000
2025-12-06 02:56.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.22it/s, critic_loss=1.29e+17, actor_loss=-1.12e+9, temp=8.1e+6, temp_loss=-5.14e+4]


2025-12-06 02:56.55 [info     ] SAC_20251206013150: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.004826099395751953, 'time_algorithm_update': 0.019902307748794555, 'critic_loss': 1.2877636126185094e+17, 'actor_loss': -1117258687.744, 'temp': 8098613.12, 'temp_loss': -48454.63428515625, 'time_step': 0.0250151047706604, 'td_error': 5.268818980668692e+16, 'value_scale': 1076844843.5473597, 'discounted_advantage': -1484088712.6646008, 'initial_state': 1332635264.0, 'diff_eval': 102889.479508615} step=173000
2025-12-06 02:56.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.99it/s, critic_loss=1.35e+17, actor_loss=-1.15e+9, temp=8.31e+6, temp_loss=-6.34e+4]


2025-12-06 02:57.24 [info     ] SAC_20251206013150: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.004806011199951172, 'time_algorithm_update': 0.02011552119255066, 'critic_loss': 1.3521851869208091e+17, 'actor_loss': -1145517977.344, 'temp': 8308426.1745, 'temp_loss': -61182.224625, 'time_step': 0.025185512065887452, 'td_error': 5.5264369310577336e+16, 'value_scale': 1102745204.6135793, 'discounted_advantage': -1521277242.4030821, 'initial_state': 1365355776.0, 'diff_eval': 102648.21166886587} step=174000
2025-12-06 02:57.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:25<00:00, 38.50it/s, critic_loss=1.42e+17, actor_loss=-1.17e+9, temp=8.52e+6, temp_loss=-5.24e+4]


2025-12-06 02:57.54 [info     ] SAC_20251206013150: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.004749624967575073, 'time_algorithm_update': 0.020461743354797363, 'critic_loss': 1.4165216232889909e+17, 'actor_loss': -1172629402.112, 'temp': 8522059.141, 'temp_loss': -51187.4272890625, 'time_step': 0.025491786003112794, 'td_error': 5.801732166042532e+16, 'value_scale': 1129890226.0117352, 'discounted_advantage': -1557397693.0692246, 'initial_state': 1399372544.0, 'diff_eval': 102802.39065672306} step=175000
2025-12-06 02:57.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.66it/s, critic_loss=1.47e+17, actor_loss=-1.2e+9, temp=8.75e+6, temp_loss=-5.57e+4]


2025-12-06 02:58.25 [info     ] SAC_20251206013150: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.004812958240509033, 'time_algorithm_update': 0.021691831827163697, 'critic_loss': 1.4704115254216883e+17, 'actor_loss': -1202367811.328, 'temp': 8746743.815, 'temp_loss': -51148.5606953125, 'time_step': 0.026807677030563356, 'td_error': 6.089797653764443e+16, 'value_scale': 1158359903.812238, 'discounted_advantage': -1588917434.6276586, 'initial_state': 1435508096.0, 'diff_eval': 102762.623794652} step=176000
2025-12-06 02:58.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.20it/s, critic_loss=1.56e+17, actor_loss=-1.23e+9, temp=8.96e+6, temp_loss=-6.73e+4]


2025-12-06 02:58.54 [info     ] SAC_20251206013150: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.004820297241210938, 'time_algorithm_update': 0.019958582639694213, 'critic_loss': 1.5649408102039763e+17, 'actor_loss': -1231310861.568, 'temp': 8965251.299, 'temp_loss': -70848.77521484374, 'time_step': 0.025055288076400756, 'td_error': 6.390340857483693e+16, 'value_scale': 1186047971.5540655, 'discounted_advantage': -1630938126.635579, 'initial_state': 1470263168.0, 'diff_eval': 102460.85471163252} step=177000
2025-12-06 02:58.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.29it/s, critic_loss=1.64e+17, actor_loss=-1.26e+9, temp=9.22e+6, temp_loss=-5.55e+4]


2025-12-06 02:59.23 [info     ] SAC_20251206013150: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.004777090787887573, 'time_algorithm_update': 0.019946616411209105, 'critic_loss': 1.645854777989105e+17, 'actor_loss': -1260439767.68, 'temp': 9217136.566, 'temp_loss': -56492.3294140625, 'time_step': 0.02501503896713257, 'td_error': 6.709703419685725e+16, 'value_scale': 1214951549.827326, 'discounted_advantage': -1672410138.99967, 'initial_state': 1506995840.0, 'diff_eval': 102795.67180542534} step=178000
2025-12-06 02:59.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:25<00:00, 39.06it/s, critic_loss=1.72e+17, actor_loss=-1.29e+9, temp=9.45e+6, temp_loss=-6e+4]  


2025-12-06 02:59.52 [info     ] SAC_20251206013150: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.004807056903839112, 'time_algorithm_update': 0.020084080934524535, 'critic_loss': 1.7208927278303334e+17, 'actor_loss': -1290927319.168, 'temp': 9449354.231, 'temp_loss': -61410.7208203125, 'time_step': 0.025149171113967896, 'td_error': 7.022648968707788e+16, 'value_scale': 1243276404.1441743, 'discounted_advantage': -1708760338.3818736, 'initial_state': 1542990208.0, 'diff_eval': 102385.30979533248} step=179000
2025-12-06 02:59.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.73it/s, critic_loss=1.8e+17, actor_loss=-1.32e+9, temp=9.69e+6, temp_loss=-5.91e+4]


2025-12-06 03:00.25 [info     ] SAC_20251206013150: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.005346275806427002, 'time_algorithm_update': 0.022630196332931517, 'critic_loss': 1.7989866909558688e+17, 'actor_loss': -1321268277.504, 'temp': 9686934.202, 'temp_loss': -60181.05651171875, 'time_step': 0.028259671449661256, 'td_error': 7.366904893788373e+16, 'value_scale': 1273087557.270746, 'discounted_advantage': -1749750832.95872, 'initial_state': 1580896128.0, 'diff_eval': 102856.16797826716} step=180000
2025-12-06 03:00.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.23it/s, critic_loss=1.88e+17, actor_loss=-1.35e+9, temp=9.94e+6, temp_loss=-6.04e+4]


2025-12-06 03:00.57 [info     ] SAC_20251206013150: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.00543721055984497, 'time_algorithm_update': 0.022107533931732176, 'critic_loss': 1.8795575935037658e+17, 'actor_loss': -1352046680.064, 'temp': 9936270.414, 'temp_loss': -59365.5262890625, 'time_step': 0.02783389687538147, 'td_error': 7.711442677518502e+16, 'value_scale': 1302939803.2388935, 'discounted_advantage': -1785645362.5193994, 'initial_state': 1618773760.0, 'diff_eval': 102567.24550134958} step=181000
2025-12-06 03:00.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.15it/s, critic_loss=1.97e+17, actor_loss=-1.38e+9, temp=1.02e+7, temp_loss=-5.79e+4]


2025-12-06 03:01.32 [info     ] SAC_20251206013150: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.005719836711883545, 'time_algorithm_update': 0.02355302381515503, 'critic_loss': 1.9718090696107123e+17, 'actor_loss': -1383464258.304, 'temp': 10162562.793, 'temp_loss': -61041.91978515625, 'time_step': 0.02959247636795044, 'td_error': 8.057039978055123e+16, 'value_scale': 1331326825.736798, 'discounted_advantage': -1828979205.4173183, 'initial_state': 1654473344.0, 'diff_eval': 102448.45132437536} step=182000
2025-12-06 03:01.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.15it/s, critic_loss=2.06e+17, actor_loss=-1.41e+9, temp=1.04e+7, temp_loss=-6.43e+4]


2025-12-06 03:02.07 [info     ] SAC_20251206013150: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.00590641450881958, 'time_algorithm_update': 0.024286909103393553, 'critic_loss': 2.056130517302908e+17, 'actor_loss': -1414885428.608, 'temp': 10425763.993, 'temp_loss': -66549.22273828126, 'time_step': 0.03051151204109192, 'td_error': 8.441831413125781e+16, 'value_scale': 1362839586.8298407, 'discounted_advantage': -1870090492.7997305, 'initial_state': 1694534144.0, 'diff_eval': 102396.27931140455} step=183000
2025-12-06 03:02.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.43it/s, critic_loss=2.17e+17, actor_loss=-1.45e+9, temp=1.07e+7, temp_loss=-6.41e+4]


2025-12-06 03:02.42 [info     ] SAC_20251206013150: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.005878588199615478, 'time_algorithm_update': 0.024055270671844483, 'critic_loss': 2.173849123195464e+17, 'actor_loss': -1447317903.488, 'temp': 10699019.054, 'temp_loss': -62517.1416171875, 'time_step': 0.03024268126487732, 'td_error': 8.847638505822429e+16, 'value_scale': 1393921609.1601007, 'discounted_advantage': -1923785738.8321693, 'initial_state': 1733816192.0, 'diff_eval': 103090.6475873637} step=184000
2025-12-06 03:02.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.36it/s, critic_loss=2.26e+17, actor_loss=-1.48e+9, temp=1.1e+7, temp_loss=-6.6e+4] 


2025-12-06 03:03.18 [info     ] SAC_20251206013150: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.006082546472549438, 'time_algorithm_update': 0.02485967755317688, 'critic_loss': 2.2631525510162048e+17, 'actor_loss': -1479977419.008, 'temp': 10973131.533, 'temp_loss': -64032.31119140625, 'time_step': 0.031264548301696775, 'td_error': 9.236850683641565e+16, 'value_scale': 1424778457.8172674, 'discounted_advantage': -1961294484.0604122, 'initial_state': 1772805504.0, 'diff_eval': 102814.45158849237} step=185000
2025-12-06 03:03.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.22it/s, critic_loss=2.36e+17, actor_loss=-1.51e+9, temp=1.12e+7, temp_loss=-6.95e+4]


2025-12-06 03:03.52 [info     ] SAC_20251206013150: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.00575459098815918, 'time_algorithm_update': 0.023459614515304567, 'critic_loss': 2.365093871486396e+17, 'actor_loss': -1512280232.32, 'temp': 11223315.005, 'temp_loss': -67247.5549453125, 'time_step': 0.029521276473999023, 'td_error': 9.644087626952243e+16, 'value_scale': 1455835159.658005, 'discounted_advantage': -2004230070.2236238, 'initial_state': 1812198912.0, 'diff_eval': 102368.18458436341} step=186000
2025-12-06 03:03.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.31it/s, critic_loss=2.44e+17, actor_loss=-1.55e+9, temp=1.15e+7, temp_loss=-6.52e+4]


2025-12-06 03:04.26 [info     ] SAC_20251206013150: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.0058355779647827144, 'time_algorithm_update': 0.02333721160888672, 'critic_loss': 2.445647310611368e+17, 'actor_loss': -1546647835.136, 'temp': 11485000.667, 'temp_loss': -62142.67471875, 'time_step': 0.02947637963294983, 'td_error': 1.0084958750418834e+17, 'value_scale': 1489393555.84912, 'discounted_advantage': -2041697841.6734302, 'initial_state': 1854839552.0, 'diff_eval': 102386.36761494275} step=187000
2025-12-06 03:04.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.63it/s, critic_loss=2.58e+17, actor_loss=-1.58e+9, temp=1.17e+7, temp_loss=-6.8e+4]


2025-12-06 03:05.00 [info     ] SAC_20251206013150: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.0059254965782165525, 'time_algorithm_update': 0.0238034987449646, 'critic_loss': 2.575848617210282e+17, 'actor_loss': -1580922986.24, 'temp': 11732548.276, 'temp_loss': -70022.3648359375, 'time_step': 0.03005190181732178, 'td_error': 1.0540834336033683e+17, 'value_scale': 1522011033.24057, 'discounted_advantage': -2090070311.869667, 'initial_state': 1896082688.0, 'diff_eval': 102654.51498807425} step=188000
2025-12-06 03:05.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.81it/s, critic_loss=2.68e+17, actor_loss=-1.62e+9, temp=1.2e+7, temp_loss=-7.47e+4]


2025-12-06 03:05.31 [info     ] SAC_20251206013150: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.005202947616577148, 'time_algorithm_update': 0.021166324377059936, 'critic_loss': 2.6813595932333546e+17, 'actor_loss': -1616371310.592, 'temp': 12015943.168, 'temp_loss': -74434.5914296875, 'time_step': 0.026630119800567627, 'td_error': 1.1013734266470246e+17, 'value_scale': 1555878169.361274, 'discounted_advantage': -2134663116.120272, 'initial_state': 1939057024.0, 'diff_eval': 102555.83767767064} step=189000
2025-12-06 03:05.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.65it/s, critic_loss=2.83e+17, actor_loss=-1.65e+9, temp=1.23e+7, temp_loss=-7.11e+4]


2025-12-06 03:06.03 [info     ] SAC_20251206013150: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.00536722731590271, 'time_algorithm_update': 0.02184323740005493, 'critic_loss': 2.8276729115551622e+17, 'actor_loss': -1649795495.936, 'temp': 12304818.936, 'temp_loss': -69302.8716328125, 'time_step': 0.027500038385391234, 'td_error': 1.1512161190726718e+17, 'value_scale': 1590260437.6194468, 'discounted_advantage': -2183281351.4560485, 'initial_state': 1982477312.0, 'diff_eval': 102714.7332273417} step=190000
2025-12-06 03:06.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.04it/s, critic_loss=2.97e+17, actor_loss=-1.69e+9, temp=1.26e+7, temp_loss=-8.22e+4]


2025-12-06 03:06.35 [info     ] SAC_20251206013150: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.005498722791671753, 'time_algorithm_update': 0.02220367670059204, 'critic_loss': 2.972473016347724e+17, 'actor_loss': -1685951845.76, 'temp': 12601602.406, 'temp_loss': -80447.137515625, 'time_step': 0.02799621629714966, 'td_error': 1.1999415216777056e+17, 'value_scale': 1622944441.951383, 'discounted_advantage': -2236827610.5896544, 'initial_state': 2024361216.0, 'diff_eval': 102072.53807987571} step=191000
2025-12-06 03:06.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.30it/s, critic_loss=3.06e+17, actor_loss=-1.72e+9, temp=1.29e+7, temp_loss=-7.17e+4]


2025-12-06 03:07.08 [info     ] SAC_20251206013150: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.005401576042175293, 'time_algorithm_update': 0.022937981843948366, 'critic_loss': 3.060098105209866e+17, 'actor_loss': -1723733752.96, 'temp': 12913216.578, 'temp_loss': -71290.1616953125, 'time_step': 0.0286282377243042, 'td_error': 1.2536473429935954e+17, 'value_scale': 1659587820.6068735, 'discounted_advantage': -2276400927.267634, 'initial_state': 2070958720.0, 'diff_eval': 102427.61477198085} step=192000
2025-12-06 03:07.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.22it/s, critic_loss=3.23e+17, actor_loss=-1.76e+9, temp=1.32e+7, temp_loss=-8.22e+4]


2025-12-06 03:07.41 [info     ] SAC_20251206013150: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.0058683700561523435, 'time_algorithm_update': 0.022568628311157225, 'critic_loss': 3.227295008113558e+17, 'actor_loss': -1759796139.264, 'temp': 13206456.719, 'temp_loss': -76962.18934375, 'time_step': 0.02871290683746338, 'td_error': 1.30982414584882e+17, 'value_scale': 1695231090.2129087, 'discounted_advantage': -2334877337.052319, 'initial_state': 2116109824.0, 'diff_eval': 102570.30917228578} step=193000
2025-12-06 03:07.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.89it/s, critic_loss=3.37e+17, actor_loss=-1.8e+9, temp=1.35e+7, temp_loss=-9.11e+4]


2025-12-06 03:08.13 [info     ] SAC_20251206013150: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.00549591326713562, 'time_algorithm_update': 0.022316509485244752, 'critic_loss': 3.3733503081648435e+17, 'actor_loss': -1798004657.152, 'temp': 13546254.015, 'temp_loss': -87841.60146875, 'time_step': 0.0281024534702301, 'td_error': 1.3675918837094733e+17, 'value_scale': 1731243671.0544846, 'discounted_advantage': -2392216519.4059267, 'initial_state': 2161882880.0, 'diff_eval': 102581.50482730914} step=194000
2025-12-06 03:08.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:27<00:00, 36.47it/s, critic_loss=3.5e+17, actor_loss=-1.84e+9, temp=1.39e+7, temp_loss=-7.83e+4]


2025-12-06 03:08.44 [info     ] SAC_20251206013150: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.0051742281913757324, 'time_algorithm_update': 0.02144634699821472, 'critic_loss': 3.500936208430452e+17, 'actor_loss': -1837039970.048, 'temp': 13870097.755, 'temp_loss': -73901.538265625, 'time_step': 0.0269026563167572, 'td_error': 1.4250105637006672e+17, 'value_scale': 1767989573.9145012, 'discounted_advantage': -2435419287.8438153, 'initial_state': 2208851456.0, 'diff_eval': 102034.40691478414} step=195000
2025-12-06 03:08.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.86it/s, critic_loss=3.67e+17, actor_loss=-1.87e+9, temp=1.42e+7, temp_loss=-7.35e+4]


2025-12-06 03:09.18 [info     ] SAC_20251206013150: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.006503336668014527, 'time_algorithm_update': 0.02138610339164734, 'critic_loss': 3.6733938619843923e+17, 'actor_loss': -1874598736.256, 'temp': 14181093.49, 'temp_loss': -76840.24028125, 'time_step': 0.028163142442703248, 'td_error': 1.4881588033575632e+17, 'value_scale': 1805388005.5389774, 'discounted_advantage': -2495656193.3636065, 'initial_state': 2255926784.0, 'diff_eval': 102904.44013462083} step=196000
2025-12-06 03:09.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.16it/s, critic_loss=3.82e+17, actor_loss=-1.91e+9, temp=1.45e+7, temp_loss=-9.06e+4]


2025-12-06 03:09.50 [info     ] SAC_20251206013150: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.005434029340744019, 'time_algorithm_update': 0.02219984197616577, 'critic_loss': 3.821896226180925e+17, 'actor_loss': -1914841324.8, 'temp': 14517884.282, 'temp_loss': -94103.3795390625, 'time_step': 0.0279205482006073, 'td_error': 1.5503890391088563e+17, 'value_scale': 1843131207.5507126, 'discounted_advantage': -2544464876.357576, 'initial_state': 2304020224.0, 'diff_eval': 102210.5378769771} step=197000
2025-12-06 03:09.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.28it/s, critic_loss=4e+17, actor_loss=-1.95e+9, temp=1.49e+7, temp_loss=-7.88e+4]  


2025-12-06 03:10.22 [info     ] SAC_20251206013150: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.005422150611877442, 'time_algorithm_update': 0.0221271014213562, 'critic_loss': 3.999149572700693e+17, 'actor_loss': -1954729800.32, 'temp': 14860761.338, 'temp_loss': -79840.8575625, 'time_step': 0.02781782364845276, 'td_error': 1.6178142990430176e+17, 'value_scale': 1881582490.8767812, 'discounted_advantage': -2606832012.0504384, 'initial_state': 2352975360.0, 'diff_eval': 102586.70706467018} step=198000
2025-12-06 03:10.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:27<00:00, 35.93it/s, critic_loss=4.16e+17, actor_loss=-2e+9, temp=1.52e+7, temp_loss=-8.32e+4]  


2025-12-06 03:10.54 [info     ] SAC_20251206013150: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.005303046941757202, 'time_algorithm_update': 0.02172215223312378, 'critic_loss': 4.1557942156378976e+17, 'actor_loss': -1995447878.144, 'temp': 15211175.655, 'temp_loss': -75560.92228125, 'time_step': 0.027299682855606078, 'td_error': 1.687767647129943e+17, 'value_scale': 1922421923.16513, 'discounted_advantage': -2654939992.6299086, 'initial_state': 2405080320.0, 'diff_eval': 102797.45850818335} step=199000
2025-12-06 03:10.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.71it/s, critic_loss=4.31e+17, actor_loss=-2.04e+9, temp=1.55e+7, temp_loss=-8.81e+4]


2025-12-06 03:11.25 [info     ] SAC_20251206013150: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.005393885135650635, 'time_algorithm_update': 0.021787292957305908, 'critic_loss': 4.3096881270135296e+17, 'actor_loss': -2037087753.6, 'temp': 15540868.499, 'temp_loss': -78927.20040625, 'time_step': 0.027475628852844238, 'td_error': 1.7576851038882797e+17, 'value_scale': 1962481094.974015, 'discounted_advantage': -2702050936.403351, 'initial_state': 2456227584.0, 'diff_eval': 102659.51264716394} step=200000
2025-12-06 03:11.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\SAC_20251206013150\model_200000.d3
Training model:  BCQ
2025-12-06 03:11.26 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>

Epoch 1/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.88it/s, vae_loss=0.0442, critic_loss=0.447, actor_loss=-1.53]


2025-12-06 03:12.03 [info     ] BCQ_20251206031126: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.0023758995532989504, 'time_algorithm_update': 0.02816832447052002, 'vae_loss': 0.04416180654894561, 'critic_loss': 0.45352013379335404, 'actor_loss': -1.5344468662142754, 'time_step': 0.03082116746902466, 'td_error': 1.0399682556859051, 'value_scale': 3.235233563923551, 'discounted_advantage': -2.4132424148377867, 'initial_state': 3.0070056915283203, 'diff_eval': 2776.186905765942} step=1000
2025-12-06 03:12.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.52it/s, vae_loss=0.0371, critic_loss=1.97, actor_loss=-3.5]


2025-12-06 03:12.39 [info     ] BCQ_20251206031126: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.0023032779693603514, 'time_algorithm_update': 0.027645480632781983, 'vae_loss': 0.03705825681425631, 'critic_loss': 1.976869007885456, 'actor_loss': -3.5095707677602768, 'time_step': 0.030220213174819946, 'td_error': 2.2414450643140085, 'value_scale': 5.61853492077618, 'discounted_advantage': -4.9232227418551515, 'initial_state': 4.17404317855835, 'diff_eval': 2160.783194852035} step=2000
2025-12-06 03:12.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.02it/s, vae_loss=0.032, critic_loss=4.69, actor_loss=-5.5] 


2025-12-06 03:13.16 [info     ] BCQ_20251206031126: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.002333059549331665, 'time_algorithm_update': 0.028091968059539795, 'vae_loss': 0.032047261307947335, 'critic_loss': 4.7096439455747605, 'actor_loss': -5.5057153136730195, 'time_step': 0.03070011568069458, 'td_error': 3.418337589876402, 'value_scale': 7.802387677548392, 'discounted_advantage': -7.30133186107786, 'initial_state': 6.11273193359375, 'diff_eval': 1424.892537693365} step=3000
2025-12-06 03:13.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.50it/s, vae_loss=0.0286, critic_loss=7.99, actor_loss=-7.34]


2025-12-06 03:13.51 [info     ] BCQ_20251206031126: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.002263498783111572, 'time_algorithm_update': 0.02678454852104187, 'vae_loss': 0.02862870814744383, 'critic_loss': 7.994783030748367, 'actor_loss': -7.348067032814026, 'time_step': 0.029313442945480345, 'td_error': 4.692403911807428, 'value_scale': 10.126091356683531, 'discounted_advantage': -8.421993452168275, 'initial_state': 8.578512191772461, 'diff_eval': 1176.847250107449} step=4000
2025-12-06 03:13.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.63it/s, vae_loss=0.0262, critic_loss=11.8, actor_loss=-9.13]


2025-12-06 03:14.27 [info     ] BCQ_20251206031126: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.0023117053508758547, 'time_algorithm_update': 0.027540385961532594, 'vae_loss': 0.02621261615958065, 'critic_loss': 11.842073868751525, 'actor_loss': -9.134907559871674, 'time_step': 0.030125993490219116, 'td_error': 9.480133500581307, 'value_scale': 11.408441418546118, 'discounted_advantage': -9.614457231127071, 'initial_state': 9.928576469421387, 'diff_eval': 912.7561282571907} step=5000
2025-12-06 03:14.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.10it/s, vae_loss=0.0241, critic_loss=15.4, actor_loss=-10.7]


2025-12-06 03:15.03 [info     ] BCQ_20251206031126: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.0022507936954498293, 'time_algorithm_update': 0.0271816565990448, 'vae_loss': 0.024128738814033567, 'critic_loss': 15.378030324220658, 'actor_loss': -10.721829642295837, 'time_step': 0.029695411205291747, 'td_error': 7.399784383090088, 'value_scale': 13.50569353434386, 'discounted_advantage': -11.670963452811183, 'initial_state': 12.18398666381836, 'diff_eval': 998.3237520395955} step=6000
2025-12-06 03:15.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.84it/s, vae_loss=0.0222, critic_loss=19.9, actor_loss=-12] 


2025-12-06 03:15.39 [info     ] BCQ_20251206031126: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.002329192876815796, 'time_algorithm_update': 0.02735402774810791, 'vae_loss': 0.022216461239382625, 'critic_loss': 19.941763258218764, 'actor_loss': -11.991680543899536, 'time_step': 0.029940281867980957, 'td_error': 8.736366256295177, 'value_scale': 14.486236752946889, 'discounted_advantage': -11.57636949327549, 'initial_state': 13.167510032653809, 'diff_eval': 935.1442651516787} step=7000
2025-12-06 03:15.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.11it/s, vae_loss=0.0204, critic_loss=23.6, actor_loss=-13] 


2025-12-06 03:16.15 [info     ] BCQ_20251206031126: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.0022435929775238036, 'time_algorithm_update': 0.027158435821533204, 'vae_loss': 0.020420075305271894, 'critic_loss': 23.62184797358513, 'actor_loss': -13.031467564582824, 'time_step': 0.029675567626953125, 'td_error': 10.169349272158229, 'value_scale': 14.384273474416185, 'discounted_advantage': -10.806434534385744, 'initial_state': 15.557463645935059, 'diff_eval': 817.6793734080292} step=8000
2025-12-06 03:16.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.27it/s, vae_loss=0.0194, critic_loss=27, actor_loss=-13.9] 


2025-12-06 03:16.52 [info     ] BCQ_20251206031126: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.0023344650268554688, 'time_algorithm_update': 0.027839803457260132, 'vae_loss': 0.01939002165570855, 'critic_loss': 27.05024478328228, 'actor_loss': -13.867080026626587, 'time_step': 0.030448249101638795, 'td_error': 13.373858428710315, 'value_scale': 15.117146431822064, 'discounted_advantage': -12.940985612963392, 'initial_state': 14.722648620605469, 'diff_eval': 720.5786728148543} step=9000
2025-12-06 03:16.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.69it/s, vae_loss=0.0182, critic_loss=31.4, actor_loss=-14.6]


2025-12-06 03:17.28 [info     ] BCQ_20251206031126: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.002297765254974365, 'time_algorithm_update': 0.02746892762184143, 'vae_loss': 0.018256920550949872, 'critic_loss': 31.356301297187805, 'actor_loss': -14.561306529045105, 'time_step': 0.03004231834411621, 'td_error': 15.486472137135758, 'value_scale': 15.30268309550939, 'discounted_advantage': -11.423661571480388, 'initial_state': 11.326711654663086, 'diff_eval': 718.5735216322553} step=10000
2025-12-06 03:17.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.88it/s, vae_loss=0.0179, critic_loss=34.9, actor_loss=-15.4]


2025-12-06 03:18.04 [info     ] BCQ_20251206031126: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.0022754158973693848, 'time_algorithm_update': 0.027326192140579224, 'vae_loss': 0.017937609664164484, 'critic_loss': 35.09263580799103, 'actor_loss': -15.421441590309144, 'time_step': 0.029881339073181152, 'td_error': 18.39653511279982, 'value_scale': 15.50129089280432, 'discounted_advantage': -11.808942016166554, 'initial_state': 13.263005256652832, 'diff_eval': 698.1836580988906} step=11000
2025-12-06 03:18.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.69it/s, vae_loss=0.0168, critic_loss=37.5, actor_loss=-16.5]


2025-12-06 03:18.41 [info     ] BCQ_20251206031126: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.0024297916889190673, 'time_algorithm_update': 0.028297785043716432, 'vae_loss': 0.01678870873944834, 'critic_loss': 37.5457036819458, 'actor_loss': -16.53747705745697, 'time_step': 0.031000752449035645, 'td_error': 13.869123833276479, 'value_scale': 17.17871365763016, 'discounted_advantage': -13.627953014423595, 'initial_state': 17.465160369873047, 'diff_eval': 696.3007375199411} step=12000
2025-12-06 03:18.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.57it/s, vae_loss=0.0162, critic_loss=36.2, actor_loss=-17.5]


2025-12-06 03:19.18 [info     ] BCQ_20251206031126: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.0022740180492401125, 'time_algorithm_update': 0.027632057428359984, 'vae_loss': 0.0161705690077506, 'critic_loss': 36.16957084131241, 'actor_loss': -17.53598557090759, 'time_step': 0.030177350997924804, 'td_error': 12.943371652327853, 'value_scale': 17.12171915944194, 'discounted_advantage': -19.543580396638024, 'initial_state': 16.760608673095703, 'diff_eval': 999.393489940856} step=13000
2025-12-06 03:19.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.24it/s, vae_loss=0.0152, critic_loss=37.9, actor_loss=-18.2]


2025-12-06 03:19.55 [info     ] BCQ_20251206031126: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.002354275703430176, 'time_algorithm_update': 0.027844650506973267, 'vae_loss': 0.015257205030415208, 'critic_loss': 37.95929800128937, 'actor_loss': -18.181246976852417, 'time_step': 0.030480168104171753, 'td_error': 28.266427503660882, 'value_scale': 17.141427686036707, 'discounted_advantage': -10.333777142738235, 'initial_state': 17.171602249145508, 'diff_eval': 650.8125237395133} step=14000
2025-12-06 03:19.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.78it/s, vae_loss=0.0147, critic_loss=40.9, actor_loss=-18.6]


2025-12-06 03:20.32 [info     ] BCQ_20251206031126: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.0024030659198760987, 'time_algorithm_update': 0.028236886739730835, 'vae_loss': 0.014714565377682447, 'critic_loss': 40.920123266220095, 'actor_loss': -18.61431140613556, 'time_step': 0.030905760049819948, 'td_error': 16.672162001554046, 'value_scale': 17.65161857075758, 'discounted_advantage': -18.544238916506362, 'initial_state': 12.299657821655273, 'diff_eval': 722.3223950389512} step=15000
2025-12-06 03:20.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.18it/s, vae_loss=0.0145, critic_loss=41.5, actor_loss=-18.6]


2025-12-06 03:21.10 [info     ] BCQ_20251206031126: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.002439899444580078, 'time_algorithm_update': 0.028766839504241942, 'vae_loss': 0.014485408129170537, 'critic_loss': 41.473244351387024, 'actor_loss': -18.595821574211122, 'time_step': 0.031492350578308105, 'td_error': 14.025389941614213, 'value_scale': 19.045260337208713, 'discounted_advantage': -18.272738709014686, 'initial_state': 19.45030975341797, 'diff_eval': 690.7585003024955} step=16000
2025-12-06 03:21.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.54it/s, vae_loss=0.0139, critic_loss=42.2, actor_loss=-18.2]


2025-12-06 03:21.47 [info     ] BCQ_20251206031126: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.002410116195678711, 'time_algorithm_update': 0.028479942560195922, 'vae_loss': 0.013852132966741919, 'critic_loss': 42.22590653991699, 'actor_loss': -18.225659182548522, 'time_step': 0.03116393518447876, 'td_error': 19.348340384543928, 'value_scale': 17.234843202876764, 'discounted_advantage': -13.741069150063439, 'initial_state': 13.369182586669922, 'diff_eval': 554.5074437888494} step=17000
2025-12-06 03:21.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.93it/s, vae_loss=0.0136, critic_loss=43.2, actor_loss=-17.5]


2025-12-06 03:22.24 [info     ] BCQ_20251206031126: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.0023832955360412597, 'time_algorithm_update': 0.02808795475959778, 'vae_loss': 0.01361951991636306, 'critic_loss': 43.15251780033112, 'actor_loss': -17.45677376651764, 'time_step': 0.03074707818031311, 'td_error': 14.980612699726324, 'value_scale': 18.468589285763304, 'discounted_advantage': -15.837567241215357, 'initial_state': 18.91208839416504, 'diff_eval': 716.5358430925199} step=18000
2025-12-06 03:22.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.60it/s, vae_loss=0.0132, critic_loss=40.8, actor_loss=-16.6]


2025-12-06 03:23.01 [info     ] BCQ_20251206031126: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.0023358581066131594, 'time_algorithm_update': 0.027506590843200684, 'vae_loss': 0.013181311378255487, 'critic_loss': 40.746673333644864, 'actor_loss': -16.595330993652343, 'time_step': 0.030120406866073608, 'td_error': 14.678938112017029, 'value_scale': 18.112234890738883, 'discounted_advantage': -14.036373299110684, 'initial_state': 15.457563400268555, 'diff_eval': 601.6017917963076} step=19000
2025-12-06 03:23.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.97it/s, vae_loss=0.0127, critic_loss=39.4, actor_loss=-15.8]


2025-12-06 03:23.37 [info     ] BCQ_20251206031126: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.002282480239868164, 'time_algorithm_update': 0.027252710819244384, 'vae_loss': 0.012723226735834032, 'critic_loss': 39.364537419319156, 'actor_loss': -15.77540951538086, 'time_step': 0.02980548024177551, 'td_error': 19.015488512860518, 'value_scale': 16.84192708119975, 'discounted_advantage': -14.249085998087446, 'initial_state': 12.7122802734375, 'diff_eval': 538.7168577815738} step=20000
2025-12-06 03:23.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.59it/s, vae_loss=0.0124, critic_loss=37.8, actor_loss=-15.1]


2025-12-06 03:24.14 [info     ] BCQ_20251206031126: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.002371356248855591, 'time_algorithm_update': 0.028414896965026856, 'vae_loss': 0.01236606556084007, 'critic_loss': 37.7793331155777, 'actor_loss': -15.051278606414796, 'time_step': 0.031064847469329835, 'td_error': 13.993412842405755, 'value_scale': 17.84689864911895, 'discounted_advantage': -13.512891672373957, 'initial_state': 18.400672912597656, 'diff_eval': 481.24319015174564} step=21000
2025-12-06 03:24.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.24it/s, vae_loss=0.0123, critic_loss=35.9, actor_loss=-14.5]


2025-12-06 03:24.50 [info     ] BCQ_20251206031126: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.002251871109008789, 'time_algorithm_update': 0.027064933538436888, 'vae_loss': 0.01232324667694047, 'critic_loss': 35.789773858547214, 'actor_loss': -14.48118928527832, 'time_step': 0.029578116178512574, 'td_error': 15.485002383452166, 'value_scale': 17.079433726360715, 'discounted_advantage': -13.876683904938451, 'initial_state': 15.312228202819824, 'diff_eval': 440.59132226461827} step=22000
2025-12-06 03:24.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.75it/s, vae_loss=0.0121, critic_loss=34.1, actor_loss=-14.1]


2025-12-06 03:25.26 [info     ] BCQ_20251206031126: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.0022851941585540774, 'time_algorithm_update': 0.027438968420028687, 'vae_loss': 0.012086267843376845, 'critic_loss': 34.10780510854721, 'actor_loss': -14.124424205780029, 'time_step': 0.02999557828903198, 'td_error': 14.951589520458127, 'value_scale': 17.149820695716297, 'discounted_advantage': -14.584266627836326, 'initial_state': 14.82983684539795, 'diff_eval': 615.4832685267899} step=23000
2025-12-06 03:25.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.54it/s, vae_loss=0.0115, critic_loss=33.1, actor_loss=-13.9]


2025-12-06 03:26.03 [info     ] BCQ_20251206031126: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.002373314619064331, 'time_algorithm_update': 0.02755583953857422, 'vae_loss': 0.011468132697977126, 'critic_loss': 33.051667479515075, 'actor_loss': -13.885224884986878, 'time_step': 0.03019672417640686, 'td_error': 13.71521601583375, 'value_scale': 17.71132792716679, 'discounted_advantage': -14.287383922559394, 'initial_state': 19.695907592773438, 'diff_eval': 530.7052237189755} step=24000
2025-12-06 03:26.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.73it/s, vae_loss=0.0115, critic_loss=32.9, actor_loss=-13.8]


2025-12-06 03:26.40 [info     ] BCQ_20251206031126: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.002260406255722046, 'time_algorithm_update': 0.027423992872238158, 'vae_loss': 0.01144181076157838, 'critic_loss': 32.933892980098726, 'actor_loss': -13.758712718963624, 'time_step': 0.029959091663360596, 'td_error': 16.044959398113654, 'value_scale': 18.008062449673446, 'discounted_advantage': -9.30105541958086, 'initial_state': 18.495351791381836, 'diff_eval': 444.0600124163539} step=25000
2025-12-06 03:26.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.59it/s, vae_loss=0.0111, critic_loss=32.1, actor_loss=-13.8]


2025-12-06 03:27.16 [info     ] BCQ_20251206031126: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.002302405834197998, 'time_algorithm_update': 0.027565250158309938, 'vae_loss': 0.011146353313233704, 'critic_loss': 32.19197325134277, 'actor_loss': -13.765350261688232, 'time_step': 0.030140285968780517, 'td_error': 16.60274584079007, 'value_scale': 18.14802106398674, 'discounted_advantage': -16.586056925061442, 'initial_state': 16.440134048461914, 'diff_eval': 437.42295050268257} step=26000
2025-12-06 03:27.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.26it/s, vae_loss=0.0109, critic_loss=32.7, actor_loss=-13.7]


2025-12-06 03:27.53 [info     ] BCQ_20251206031126: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.0023729925155639648, 'time_algorithm_update': 0.027814268827438354, 'vae_loss': 0.010858514371328057, 'critic_loss': 32.71093916749954, 'actor_loss': -13.741026487350464, 'time_step': 0.030452754735946656, 'td_error': 17.21123017050144, 'value_scale': 18.175238732257913, 'discounted_advantage': -15.865671521248514, 'initial_state': 16.368452072143555, 'diff_eval': 408.57009320465795} step=27000
2025-12-06 03:27.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.41it/s, vae_loss=0.0108, critic_loss=32.6, actor_loss=-13.6]


2025-12-06 03:28.28 [info     ] BCQ_20251206031126: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.0022631144523620607, 'time_algorithm_update': 0.026878825187683107, 'vae_loss': 0.010758708121720702, 'critic_loss': 32.57468312215805, 'actor_loss': -13.61624200439453, 'time_step': 0.029401103496551513, 'td_error': 19.33662594978234, 'value_scale': 17.8519872641105, 'discounted_advantage': -15.148148379181697, 'initial_state': 15.3613862991333, 'diff_eval': 434.28521081936753} step=28000
2025-12-06 03:28.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.21it/s, vae_loss=0.0105, critic_loss=33.4, actor_loss=-13.6]


2025-12-06 03:29.04 [info     ] BCQ_20251206031126: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.0022781262397766114, 'time_algorithm_update': 0.027015652418136596, 'vae_loss': 0.01047697364166379, 'critic_loss': 33.438511417388916, 'actor_loss': -13.587073707580567, 'time_step': 0.02957246136665344, 'td_error': 18.792234566355624, 'value_scale': 16.8453958111691, 'discounted_advantage': -10.776307988080255, 'initial_state': 14.923173904418945, 'diff_eval': 390.52776881463103} step=29000
2025-12-06 03:29.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.60it/s, vae_loss=0.0101, critic_loss=32.6, actor_loss=-13.7]


2025-12-06 03:29.41 [info     ] BCQ_20251206031126: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.002429874897003174, 'time_algorithm_update': 0.02836456799507141, 'vae_loss': 0.010144108824897557, 'critic_loss': 32.6119825463295, 'actor_loss': -13.671008389472961, 'time_step': 0.031075738430023193, 'td_error': 22.85067156920477, 'value_scale': 17.984822224614845, 'discounted_advantage': -11.356328519304164, 'initial_state': 18.7684268951416, 'diff_eval': 374.3240934799553} step=30000
2025-12-06 03:29.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.08it/s, vae_loss=0.0101, critic_loss=33.3, actor_loss=-13.5]


2025-12-06 03:30.18 [info     ] BCQ_20251206031126: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.002384591817855835, 'time_algorithm_update': 0.027960919618606566, 'vae_loss': 0.010054367660079151, 'critic_loss': 33.371010194301604, 'actor_loss': -13.519039461135865, 'time_step': 0.030612511157989502, 'td_error': 15.70249299400122, 'value_scale': 18.481469726803567, 'discounted_advantage': -16.168543465867206, 'initial_state': 19.459152221679688, 'diff_eval': 479.2183594322667} step=31000
2025-12-06 03:30.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.10it/s, vae_loss=0.00982, critic_loss=33.6, actor_loss=-13.5]


2025-12-06 03:30.54 [info     ] BCQ_20251206031126: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.0022940025329589845, 'time_algorithm_update': 0.027119054079055786, 'vae_loss': 0.0098338119010441, 'critic_loss': 33.60028037643433, 'actor_loss': -13.45811948299408, 'time_step': 0.029687329292297362, 'td_error': 19.302804538543366, 'value_scale': 18.61716070049891, 'discounted_advantage': -10.84916012298934, 'initial_state': 17.596813201904297, 'diff_eval': 435.92047681265865} step=32000
2025-12-06 03:30.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.28it/s, vae_loss=0.00963, critic_loss=33.2, actor_loss=-13.4]


2025-12-06 03:31.31 [info     ] BCQ_20251206031126: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.0023315913677215577, 'time_algorithm_update': 0.027834809064865112, 'vae_loss': 0.009655433982377871, 'critic_loss': 33.22571233654022, 'actor_loss': -13.37660901260376, 'time_step': 0.030436736822128295, 'td_error': 19.76810051187332, 'value_scale': 19.339572827501126, 'discounted_advantage': -14.323680629803063, 'initial_state': 17.105546951293945, 'diff_eval': 461.7875511511689} step=33000
2025-12-06 03:31.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.53it/s, vae_loss=0.00938, critic_loss=32.6, actor_loss=-13.5]


2025-12-06 03:32.07 [info     ] BCQ_20251206031126: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.0023089442253112795, 'time_algorithm_update': 0.027622194051742554, 'vae_loss': 0.009387953312601895, 'critic_loss': 32.63458214664459, 'actor_loss': -13.477264083862305, 'time_step': 0.030211012840270998, 'td_error': 21.729257545243623, 'value_scale': 18.3675124039569, 'discounted_advantage': -13.095582498469783, 'initial_state': 16.230438232421875, 'diff_eval': 352.0905687474691} step=34000
2025-12-06 03:32.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.11it/s, vae_loss=0.00939, critic_loss=32.1, actor_loss=-13.6]


2025-12-06 03:32.43 [info     ] BCQ_20251206031126: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.002289378881454468, 'time_algorithm_update': 0.02715481162071228, 'vae_loss': 0.009406081272289157, 'critic_loss': 32.02848618173599, 'actor_loss': -13.591700515747071, 'time_step': 0.029709310531616212, 'td_error': 17.671544616943603, 'value_scale': 18.702479475289458, 'discounted_advantage': -15.161826534105879, 'initial_state': 21.093175888061523, 'diff_eval': 388.77763284130435} step=35000
2025-12-06 03:32.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.41it/s, vae_loss=0.00921, critic_loss=32.9, actor_loss=-13.9]


2025-12-06 03:33.19 [info     ] BCQ_20251206031126: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.0023644342422485354, 'time_algorithm_update': 0.027721185207366942, 'vae_loss': 0.009200130572076887, 'critic_loss': 32.835417037010195, 'actor_loss': -13.85169186782837, 'time_step': 0.03034793758392334, 'td_error': 16.857454725810918, 'value_scale': 19.382083019105856, 'discounted_advantage': -16.4388108926337, 'initial_state': 20.325407028198242, 'diff_eval': 349.61229879283616} step=36000
2025-12-06 03:33.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.14it/s, vae_loss=0.00918, critic_loss=32.8, actor_loss=-14] 


2025-12-06 03:33.56 [info     ] BCQ_20251206031126: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.0023903241157531736, 'time_algorithm_update': 0.027939290046691895, 'vae_loss': 0.009214354504598305, 'critic_loss': 32.89241336297989, 'actor_loss': -14.006305520057678, 'time_step': 0.03059495496749878, 'td_error': 21.353731337076063, 'value_scale': 18.287214345283875, 'discounted_advantage': -16.637952635705794, 'initial_state': 16.057880401611328, 'diff_eval': 357.3172820553248} step=37000
2025-12-06 03:33.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.94it/s, vae_loss=0.00897, critic_loss=33.1, actor_loss=-14] 


2025-12-06 03:34.32 [info     ] BCQ_20251206031126: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.002292593002319336, 'time_algorithm_update': 0.02726779532432556, 'vae_loss': 0.008974397363606841, 'critic_loss': 33.095379518032075, 'actor_loss': -14.042159772872925, 'time_step': 0.02982689118385315, 'td_error': 17.53990918180715, 'value_scale': 19.95976244908666, 'discounted_advantage': -19.646863315215587, 'initial_state': 19.905946731567383, 'diff_eval': 409.62829291075803} step=38000
2025-12-06 03:34.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.20it/s, vae_loss=0.00878, critic_loss=33.2, actor_loss=-14.3]


2025-12-06 03:35.09 [info     ] BCQ_20251206031126: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.002360478401184082, 'time_algorithm_update': 0.02788174343109131, 'vae_loss': 0.008790290179662406, 'critic_loss': 33.290593397140505, 'actor_loss': -14.311014161109924, 'time_step': 0.03051131224632263, 'td_error': 25.286943018908424, 'value_scale': 19.561185146792038, 'discounted_advantage': -8.693337826774751, 'initial_state': 18.787630081176758, 'diff_eval': 307.8741274704451} step=39000
2025-12-06 03:35.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.66it/s, vae_loss=0.00873, critic_loss=32.2, actor_loss=-14.5]


2025-12-06 03:35.45 [info     ] BCQ_20251206031126: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.002277726173400879, 'time_algorithm_update': 0.02752558970451355, 'vae_loss': 0.00873893176880665, 'critic_loss': 32.2359792046547, 'actor_loss': -14.48932413005829, 'time_step': 0.030064865350723266, 'td_error': 16.981885264214135, 'value_scale': 19.226384291604898, 'discounted_advantage': -16.656619951373976, 'initial_state': 20.083202362060547, 'diff_eval': 384.0139797139605} step=40000
2025-12-06 03:35.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.73it/s, vae_loss=0.00859, critic_loss=33.6, actor_loss=-14.8]


2025-12-06 03:36.22 [info     ] BCQ_20251206031126: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.0022619731426239014, 'time_algorithm_update': 0.02750094151496887, 'vae_loss': 0.008592368283774703, 'critic_loss': 33.693643487930295, 'actor_loss': -14.845523906707763, 'time_step': 0.03004017162322998, 'td_error': 18.086158831317576, 'value_scale': 19.5512393663941, 'discounted_advantage': -17.23229302078317, 'initial_state': 20.078676223754883, 'diff_eval': 361.50905584753434} step=41000
2025-12-06 03:36.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.46it/s, vae_loss=0.00835, critic_loss=33, actor_loss=-15.1] 


2025-12-06 03:36.58 [info     ] BCQ_20251206031126: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.0023449201583862307, 'time_algorithm_update': 0.0276583251953125, 'vae_loss': 0.008341055273311212, 'critic_loss': 32.98242677307129, 'actor_loss': -15.152715613365173, 'time_step': 0.030273487329483033, 'td_error': 22.903389395437685, 'value_scale': 21.071601528309017, 'discounted_advantage': -19.846066594973266, 'initial_state': 17.29180335998535, 'diff_eval': 361.3329121667367} step=42000
2025-12-06 03:36.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.95it/s, vae_loss=0.00834, critic_loss=34.2, actor_loss=-15.7]


2025-12-06 03:37.34 [info     ] BCQ_20251206031126: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.002285932779312134, 'time_algorithm_update': 0.027274150848388672, 'vae_loss': 0.008330148911336438, 'critic_loss': 34.17281996679306, 'actor_loss': -15.651452205657959, 'time_step': 0.029823176622390746, 'td_error': 22.709985110971253, 'value_scale': 21.201528249028993, 'discounted_advantage': -17.824930249655704, 'initial_state': 22.743799209594727, 'diff_eval': 350.49759615790606} step=43000
2025-12-06 03:37.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.63it/s, vae_loss=0.00831, critic_loss=35.8, actor_loss=-16.2]


2025-12-06 03:38.10 [info     ] BCQ_20251206031126: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.002316692113876343, 'time_algorithm_update': 0.027540236949920655, 'vae_loss': 0.008306629518745468, 'critic_loss': 35.71750333070755, 'actor_loss': -16.155672649383543, 'time_step': 0.030117829084396364, 'td_error': 18.938185546946, 'value_scale': 22.618328897861726, 'discounted_advantage': -23.10601552846963, 'initial_state': 23.303117752075195, 'diff_eval': 428.1714733913978} step=44000
2025-12-06 03:38.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.86it/s, vae_loss=0.00833, critic_loss=36.7, actor_loss=-16.7]


2025-12-06 03:38.48 [info     ] BCQ_20251206031126: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.002343724250793457, 'time_algorithm_update': 0.028191824436187743, 'vae_loss': 0.008328302116831764, 'critic_loss': 36.84335616207123, 'actor_loss': -16.679306941986084, 'time_step': 0.03082440757751465, 'td_error': 22.556949758022846, 'value_scale': 22.727990185721698, 'discounted_advantage': -21.665909089712137, 'initial_state': 18.438705444335938, 'diff_eval': 338.83994307160486} step=45000
2025-12-06 03:38.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.24it/s, vae_loss=0.00808, critic_loss=38.8, actor_loss=-17.1]


2025-12-06 03:39.24 [info     ] BCQ_20251206031126: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.002381920576095581, 'time_algorithm_update': 0.02782997226715088, 'vae_loss': 0.008062256040982901, 'critic_loss': 38.685733904838564, 'actor_loss': -17.085563400268555, 'time_step': 0.030493431329727174, 'td_error': 21.661546510002886, 'value_scale': 23.082295302704463, 'discounted_advantage': -21.90638730170953, 'initial_state': 26.873411178588867, 'diff_eval': 328.12584134410224} step=46000
2025-12-06 03:39.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.32it/s, vae_loss=0.00784, critic_loss=38.6, actor_loss=-17.5]


2025-12-06 03:40.01 [info     ] BCQ_20251206031126: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.0023652398586273193, 'time_algorithm_update': 0.027752764701843263, 'vae_loss': 0.007856095421360806, 'critic_loss': 38.62111158466339, 'actor_loss': -17.47188569164276, 'time_step': 0.030393614292144776, 'td_error': 21.8804933217203, 'value_scale': 25.022143148316722, 'discounted_advantage': -24.523284907182592, 'initial_state': 23.295665740966797, 'diff_eval': 397.85747385029003} step=47000
2025-12-06 03:40.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.25it/s, vae_loss=0.00792, critic_loss=40.2, actor_loss=-17.7]


2025-12-06 03:40.38 [info     ] BCQ_20251206031126: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.002381524324417114, 'time_algorithm_update': 0.027803292751312256, 'vae_loss': 0.00792540558008477, 'critic_loss': 40.13619486522675, 'actor_loss': -17.70048208141327, 'time_step': 0.03046584439277649, 'td_error': 25.271560725096208, 'value_scale': 23.488195279798934, 'discounted_advantage': -9.606104911793619, 'initial_state': 24.995033264160156, 'diff_eval': 364.8396866329476} step=48000
2025-12-06 03:40.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.13it/s, vae_loss=0.00782, critic_loss=41.2, actor_loss=-18.1]


2025-12-06 03:41.14 [info     ] BCQ_20251206031126: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.002359463691711426, 'time_algorithm_update': 0.027964331388473512, 'vae_loss': 0.007828735470306129, 'critic_loss': 41.26882882785797, 'actor_loss': -18.09624454021454, 'time_step': 0.030597527265548707, 'td_error': 27.005444945168925, 'value_scale': 24.873344572468632, 'discounted_advantage': -26.605352047711417, 'initial_state': 29.083662033081055, 'diff_eval': 446.28656579111407} step=49000
2025-12-06 03:41.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.46it/s, vae_loss=0.00776, critic_loss=42.7, actor_loss=-18.4]


2025-12-06 03:41.51 [info     ] BCQ_20251206031126: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.002337928533554077, 'time_algorithm_update': 0.027660517692565917, 'vae_loss': 0.007761648421408609, 'critic_loss': 42.68356904315949, 'actor_loss': -18.421919865608217, 'time_step': 0.030263396978378296, 'td_error': 23.812239307652543, 'value_scale': 25.599571777665354, 'discounted_advantage': -21.092471772008174, 'initial_state': 22.36956024169922, 'diff_eval': 399.2038407360301} step=50000
2025-12-06 03:41.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.82it/s, vae_loss=0.00765, critic_loss=42, actor_loss=-18.8] 


2025-12-06 03:42.28 [info     ] BCQ_20251206031126: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.0023936364650726316, 'time_algorithm_update': 0.028223991870880125, 'vae_loss': 0.007652491679415107, 'critic_loss': 41.96124465847016, 'actor_loss': -18.797679828643798, 'time_step': 0.030890979051589967, 'td_error': 23.19601903884569, 'value_scale': 24.984474660471168, 'discounted_advantage': -18.207165466787973, 'initial_state': 24.47213363647461, 'diff_eval': 282.6065389162372} step=51000
2025-12-06 03:42.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.46it/s, vae_loss=0.00749, critic_loss=42.9, actor_loss=-19] 


2025-12-06 03:43.05 [info     ] BCQ_20251206031126: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.0023910348415374758, 'time_algorithm_update': 0.027614856243133545, 'vae_loss': 0.007496340366778895, 'critic_loss': 42.88010340118408, 'actor_loss': -18.973768733024595, 'time_step': 0.03027282404899597, 'td_error': 24.40060913802469, 'value_scale': 27.88187773502295, 'discounted_advantage': -25.00910763528868, 'initial_state': 29.95842742919922, 'diff_eval': 349.3049538791869} step=52000
2025-12-06 03:43.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.99it/s, vae_loss=0.00751, critic_loss=45.2, actor_loss=-19.3]


2025-12-06 03:43.41 [info     ] BCQ_20251206031126: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.00227382755279541, 'time_algorithm_update': 0.0272135694026947, 'vae_loss': 0.007496617802418768, 'critic_loss': 45.10316245651245, 'actor_loss': -19.332271276474, 'time_step': 0.029759135484695436, 'td_error': 28.60976294791253, 'value_scale': 24.7947002051356, 'discounted_advantage': -20.005411177687193, 'initial_state': 28.37407684326172, 'diff_eval': 265.45164838132723} step=53000
2025-12-06 03:43.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.66it/s, vae_loss=0.00739, critic_loss=43.7, actor_loss=-19.4]


2025-12-06 03:44.17 [info     ] BCQ_20251206031126: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.0023259189128875734, 'time_algorithm_update': 0.027508707761764527, 'vae_loss': 0.007391304860357195, 'critic_loss': 43.67597214221954, 'actor_loss': -19.417004239082335, 'time_step': 0.03010381293296814, 'td_error': 32.523782691419306, 'value_scale': 26.932244958004727, 'discounted_advantage': -21.53868279730185, 'initial_state': 29.733842849731445, 'diff_eval': 413.53408270197986} step=54000
2025-12-06 03:44.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.08it/s, vae_loss=0.00736, critic_loss=45.6, actor_loss=-19.9]


2025-12-06 03:44.52 [info     ] BCQ_20251206031126: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.0022789306640625, 'time_algorithm_update': 0.027154237270355226, 'vae_loss': 0.0073585288550239054, 'critic_loss': 45.66776712799072, 'actor_loss': -19.857511198997496, 'time_step': 0.0297029550075531, 'td_error': 29.439903250482406, 'value_scale': 25.08569545749097, 'discounted_advantage': -23.197116095276577, 'initial_state': 21.44790267944336, 'diff_eval': 344.87396230360054} step=55000
2025-12-06 03:44.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.16it/s, vae_loss=0.00705, critic_loss=43.8, actor_loss=-20] 


2025-12-06 03:45.28 [info     ] BCQ_20251206031126: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.0022787044048309326, 'time_algorithm_update': 0.027067972421646116, 'vae_loss': 0.0070427399091422555, 'critic_loss': 43.771064960479734, 'actor_loss': -20.020023646354677, 'time_step': 0.02961826491355896, 'td_error': 25.646101211897207, 'value_scale': 26.203226864200527, 'discounted_advantage': -24.841077605494302, 'initial_state': 25.969106674194336, 'diff_eval': 344.10344855057406} step=56000
2025-12-06 03:45.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.33it/s, vae_loss=0.0072, critic_loss=44.2, actor_loss=-20.2]


2025-12-06 03:46.04 [info     ] BCQ_20251206031126: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.0022734251022338866, 'time_algorithm_update': 0.02693807768821716, 'vae_loss': 0.007202567658387124, 'critic_loss': 44.1878939371109, 'actor_loss': -20.15846677684784, 'time_step': 0.02946862292289734, 'td_error': 23.327989297331346, 'value_scale': 27.04468862839162, 'discounted_advantage': -27.137338572110576, 'initial_state': 27.558164596557617, 'diff_eval': 367.5067285056793} step=57000
2025-12-06 03:46.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.89it/s, vae_loss=0.00718, critic_loss=45.8, actor_loss=-20.6]


2025-12-06 03:46.41 [info     ] BCQ_20251206031126: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.0023665921688079835, 'time_algorithm_update': 0.02815894103050232, 'vae_loss': 0.0071868443903513254, 'critic_loss': 45.76541475486756, 'actor_loss': -20.581804966926576, 'time_step': 0.03079854965209961, 'td_error': 26.387669725424104, 'value_scale': 25.328965557590813, 'discounted_advantage': -14.179718403371758, 'initial_state': 26.878450393676758, 'diff_eval': 293.7477367600015} step=58000
2025-12-06 03:46.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.42it/s, vae_loss=0.00697, critic_loss=46.2, actor_loss=-21] 


2025-12-06 03:47.17 [info     ] BCQ_20251206031126: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.002496659517288208, 'time_algorithm_update': 0.02757773947715759, 'vae_loss': 0.0069798433051910256, 'critic_loss': 46.25183063983917, 'actor_loss': -20.988040963172914, 'time_step': 0.030332703351974487, 'td_error': 26.139411839641003, 'value_scale': 24.501290629859092, 'discounted_advantage': -14.335974583511335, 'initial_state': 27.018394470214844, 'diff_eval': 247.21051920825576} step=59000
2025-12-06 03:47.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.79it/s, vae_loss=0.00697, critic_loss=46.9, actor_loss=-21.4]


2025-12-06 03:47.54 [info     ] BCQ_20251206031126: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.002357675313949585, 'time_algorithm_update': 0.028275582790374756, 'vae_loss': 0.0069561810279265045, 'critic_loss': 46.853795006752016, 'actor_loss': -21.35048241710663, 'time_step': 0.030910816192626953, 'td_error': 21.780760533645896, 'value_scale': 27.128240451285397, 'discounted_advantage': -24.872648598802122, 'initial_state': 28.153776168823242, 'diff_eval': 297.4631157411487} step=60000
2025-12-06 03:47.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.61it/s, vae_loss=0.00691, critic_loss=47.4, actor_loss=-22.2]


2025-12-06 03:48.31 [info     ] BCQ_20251206031126: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.0023784549236297607, 'time_algorithm_update': 0.028400527715682983, 'vae_loss': 0.006911654073279351, 'critic_loss': 47.408377495765684, 'actor_loss': -22.191757289886475, 'time_step': 0.03105903673171997, 'td_error': 27.7898923803719, 'value_scale': 26.941011371820142, 'discounted_advantage': -16.496744165945316, 'initial_state': 25.717893600463867, 'diff_eval': 396.28358431684677} step=61000
2025-12-06 03:48.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.75it/s, vae_loss=0.00704, critic_loss=47.5, actor_loss=-23.1]


2025-12-06 03:49.06 [info     ] BCQ_20251206031126: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.002130016803741455, 'time_algorithm_update': 0.02586751055717468, 'vae_loss': 0.007038908374728635, 'critic_loss': 47.44062388515472, 'actor_loss': -23.121862608909606, 'time_step': 0.028258868694305418, 'td_error': 22.387092792926676, 'value_scale': 27.372520634465776, 'discounted_advantage': -25.0973943632122, 'initial_state': 27.868894577026367, 'diff_eval': 330.88961800420174} step=62000
2025-12-06 03:49.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.47it/s, vae_loss=0.00668, critic_loss=47.4, actor_loss=-24.4]


2025-12-06 03:49.40 [info     ] BCQ_20251206031126: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.0020797648429870604, 'time_algorithm_update': 0.026167558193206788, 'vae_loss': 0.006695779902860522, 'critic_loss': 47.382568731307984, 'actor_loss': -24.427941398620604, 'time_step': 0.028502193450927735, 'td_error': 22.828556235599482, 'value_scale': 26.368405581700983, 'discounted_advantage': -23.32243668295224, 'initial_state': 28.102378845214844, 'diff_eval': 308.0270878982972} step=63000
2025-12-06 03:49.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.19it/s, vae_loss=0.00663, critic_loss=49.3, actor_loss=-25.7]


2025-12-06 03:50.15 [info     ] BCQ_20251206031126: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.002464897632598877, 'time_algorithm_update': 0.026002633571624755, 'vae_loss': 0.006637800982221961, 'critic_loss': 49.32406210994721, 'actor_loss': -25.717822593688965, 'time_step': 0.02873266100883484, 'td_error': 22.98016989773062, 'value_scale': 28.79565629017698, 'discounted_advantage': -23.03277330681869, 'initial_state': 29.216392517089844, 'diff_eval': 284.08666254851266} step=64000
2025-12-06 03:50.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.61it/s, vae_loss=0.0066, critic_loss=52.5, actor_loss=-27.3]


2025-12-06 03:50.49 [info     ] BCQ_20251206031126: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.0021406567096710206, 'time_algorithm_update': 0.025951715469360353, 'vae_loss': 0.0066029116972349585, 'critic_loss': 52.53324949645996, 'actor_loss': -27.273508827209472, 'time_step': 0.028354479074478148, 'td_error': 27.109320655427673, 'value_scale': 31.46215905179172, 'discounted_advantage': -30.67722678560323, 'initial_state': 33.22331237792969, 'diff_eval': 386.9477740235363} step=65000
2025-12-06 03:50.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.61it/s, vae_loss=0.00674, critic_loss=55.5, actor_loss=-28.9]


2025-12-06 03:51.24 [info     ] BCQ_20251206031126: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.002148505926132202, 'time_algorithm_update': 0.02595464324951172, 'vae_loss': 0.006742175827967003, 'critic_loss': 55.549418857574466, 'actor_loss': -28.907285081863403, 'time_step': 0.028365360736846923, 'td_error': 28.550723252948444, 'value_scale': 32.1132508963971, 'discounted_advantage': -36.26937418444955, 'initial_state': 34.410888671875, 'diff_eval': 297.71434951543387} step=66000
2025-12-06 03:51.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.70it/s, vae_loss=0.00651, critic_loss=58.3, actor_loss=-30.3]


2025-12-06 03:51.58 [info     ] BCQ_20251206031126: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.002091520309448242, 'time_algorithm_update': 0.025917933940887452, 'vae_loss': 0.006524922507582232, 'critic_loss': 58.43301283454895, 'actor_loss': -30.274272367477415, 'time_step': 0.028277669191360474, 'td_error': 27.20624816040197, 'value_scale': 34.89622523763029, 'discounted_advantage': -36.629721879623354, 'initial_state': 36.04499435424805, 'diff_eval': 334.92571927521277} step=67000
2025-12-06 03:51.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.16it/s, vae_loss=0.00657, critic_loss=63.2, actor_loss=-32.2]


2025-12-06 03:52.32 [info     ] BCQ_20251206031126: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.0020757715702056883, 'time_algorithm_update': 0.025570499658584593, 'vae_loss': 0.0065761092812754215, 'critic_loss': 63.395854429245, 'actor_loss': -32.24646283149719, 'time_step': 0.0279132878780365, 'td_error': 30.586206424072397, 'value_scale': 36.51626572695507, 'discounted_advantage': -32.51703633821355, 'initial_state': 35.876190185546875, 'diff_eval': 271.06881320196186} step=68000
2025-12-06 03:52.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.74it/s, vae_loss=0.00641, critic_loss=69.9, actor_loss=-34.1]


2025-12-06 03:53.06 [info     ] BCQ_20251206031126: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.002122408390045166, 'time_algorithm_update': 0.025892758369445802, 'vae_loss': 0.006409975841641426, 'critic_loss': 69.87681982040405, 'actor_loss': -34.12403106117249, 'time_step': 0.028274651288986204, 'td_error': 46.95260449680251, 'value_scale': 36.84375910718253, 'discounted_advantage': -20.728726060849592, 'initial_state': 40.63380813598633, 'diff_eval': 280.8155164587347} step=69000
2025-12-06 03:53.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.81it/s, vae_loss=0.00644, critic_loss=76.4, actor_loss=-36] 


2025-12-06 03:53.41 [info     ] BCQ_20251206031126: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.0022647063732147217, 'time_algorithm_update': 0.026532062768936156, 'vae_loss': 0.006446146192261949, 'critic_loss': 76.40782620811463, 'actor_loss': -35.99191261482239, 'time_step': 0.029053237676620484, 'td_error': 30.387502181364482, 'value_scale': 41.68305876231553, 'discounted_advantage': -44.27926715495479, 'initial_state': 41.220638275146484, 'diff_eval': 336.27638982354637} step=70000
2025-12-06 03:53.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.06it/s, vae_loss=0.00642, critic_loss=80.4, actor_loss=-38.1]


2025-12-06 03:54.16 [info     ] BCQ_20251206031126: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.00222489070892334, 'time_algorithm_update': 0.02632041835784912, 'vae_loss': 0.006423467880813405, 'critic_loss': 80.32963024711609, 'actor_loss': -38.07467357063293, 'time_step': 0.028810393571853637, 'td_error': 30.51761667844739, 'value_scale': 41.7154024542752, 'discounted_advantage': -43.50521735992835, 'initial_state': 42.33749008178711, 'diff_eval': 324.0965265688835} step=71000
2025-12-06 03:54.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.04it/s, vae_loss=0.00641, critic_loss=85.9, actor_loss=-40] 


2025-12-06 03:54.51 [info     ] BCQ_20251206031126: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.0022031762599945067, 'time_algorithm_update': 0.026394927501678468, 'vae_loss': 0.006410981339635327, 'critic_loss': 85.92304646110534, 'actor_loss': -39.989494157791135, 'time_step': 0.028864370346069337, 'td_error': 38.37799297218191, 'value_scale': 43.745048973319015, 'discounted_advantage': -39.6501435995442, 'initial_state': 47.664852142333984, 'diff_eval': 275.49682252744617} step=72000
2025-12-06 03:54.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.39it/s, vae_loss=0.00625, critic_loss=94.7, actor_loss=-42] 


2025-12-06 03:55.26 [info     ] BCQ_20251206031126: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.0021677253246307373, 'time_algorithm_update': 0.02615391778945923, 'vae_loss': 0.006241155029973015, 'critic_loss': 94.6697605381012, 'actor_loss': -42.00945345306396, 'time_step': 0.02856840705871582, 'td_error': 41.62011482056839, 'value_scale': 46.844791215940475, 'discounted_advantage': -38.74811141012112, 'initial_state': 49.786163330078125, 'diff_eval': 251.57689521697532} step=73000
2025-12-06 03:55.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.85it/s, vae_loss=0.00622, critic_loss=101, actor_loss=-44.2]


2025-12-06 03:56.03 [info     ] BCQ_20251206031126: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.002298561096191406, 'time_algorithm_update': 0.028287325620651244, 'vae_loss': 0.006241505918791517, 'critic_loss': 101.36653770828246, 'actor_loss': -44.23696150970459, 'time_step': 0.030867219686508178, 'td_error': 57.794937789266655, 'value_scale': 49.587603362381756, 'discounted_advantage': -52.930244676521205, 'initial_state': 47.26249694824219, 'diff_eval': 340.57715960022506} step=74000
2025-12-06 03:56.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.93it/s, vae_loss=0.00635, critic_loss=107, actor_loss=-46.1]


2025-12-06 03:56.39 [info     ] BCQ_20251206031126: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.0022275824546813964, 'time_algorithm_update': 0.027338695049285888, 'vae_loss': 0.006347431089961901, 'critic_loss': 107.20345331192017, 'actor_loss': -46.14070306777954, 'time_step': 0.029837519884109497, 'td_error': 43.18292035512716, 'value_scale': 50.96025814147227, 'discounted_advantage': -49.421555389484766, 'initial_state': 52.0318489074707, 'diff_eval': 301.7229194223833} step=75000
2025-12-06 03:56.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.36it/s, vae_loss=0.00618, critic_loss=113, actor_loss=-48] 


2025-12-06 03:57.13 [info     ] BCQ_20251206031126: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.002201350450515747, 'time_algorithm_update': 0.02611251640319824, 'vae_loss': 0.006178112210473046, 'critic_loss': 113.20682695007324, 'actor_loss': -47.9854665184021, 'time_step': 0.028579402446746827, 'td_error': 68.54414119371174, 'value_scale': 51.72602877093281, 'discounted_advantage': -24.90234017188969, 'initial_state': 59.35307312011719, 'diff_eval': 235.96703400080008} step=76000
2025-12-06 03:57.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.06it/s, vae_loss=0.00605, critic_loss=122, actor_loss=-49.7]


2025-12-06 03:57.49 [info     ] BCQ_20251206031126: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.002301323890686035, 'time_algorithm_update': 0.027082709074020387, 'vae_loss': 0.006043109122896567, 'critic_loss': 122.11232322692871, 'actor_loss': -49.71748416137695, 'time_step': 0.029669036149978636, 'td_error': 55.93876397531662, 'value_scale': 54.438813836444155, 'discounted_advantage': -38.451327504036044, 'initial_state': 55.42721176147461, 'diff_eval': 240.81043788261348} step=77000
2025-12-06 03:57.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.51it/s, vae_loss=0.00613, critic_loss=125, actor_loss=-51.7]


2025-12-06 03:58.25 [info     ] BCQ_20251206031126: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.0031165854930877684, 'time_algorithm_update': 0.026819339275360107, 'vae_loss': 0.0061174006753135475, 'critic_loss': 124.87694693756103, 'actor_loss': -51.720798526763915, 'time_step': 0.03021440601348877, 'td_error': 51.46963128094018, 'value_scale': 56.794795185889605, 'discounted_advantage': -59.20685787746774, 'initial_state': 58.37858581542969, 'diff_eval': 293.094460736391} step=78000
2025-12-06 03:58.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.17it/s, vae_loss=0.00603, critic_loss=137, actor_loss=-53.9]


2025-12-06 03:58.59 [info     ] BCQ_20251206031126: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.002064931392669678, 'time_algorithm_update': 0.025568294763565064, 'vae_loss': 0.006019100105622783, 'critic_loss': 137.38363652420043, 'actor_loss': -53.93426404953003, 'time_step': 0.027886531829833985, 'td_error': 57.995925162319736, 'value_scale': 57.328135437782535, 'discounted_advantage': -57.41501374409428, 'initial_state': 55.89395523071289, 'diff_eval': 302.4255970643223} step=79000
2025-12-06 03:58.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.64it/s, vae_loss=0.00607, critic_loss=143, actor_loss=-56.3]


2025-12-06 03:59.33 [info     ] BCQ_20251206031126: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.0021487436294555665, 'time_algorithm_update': 0.025917203187942505, 'vae_loss': 0.0060680767593439666, 'critic_loss': 142.37077144241334, 'actor_loss': -56.32884495925903, 'time_step': 0.028328023195266724, 'td_error': 58.6827072409763, 'value_scale': 61.26099823495843, 'discounted_advantage': -48.615315443116614, 'initial_state': 62.64524459838867, 'diff_eval': 259.407629847396} step=80000
2025-12-06 03:59.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.47it/s, vae_loss=0.00597, critic_loss=150, actor_loss=-58.8]


2025-12-06 04:00.08 [info     ] BCQ_20251206031126: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.0021138026714324952, 'time_algorithm_update': 0.02609936738014221, 'vae_loss': 0.005969405786599964, 'critic_loss': 149.70862060546875, 'actor_loss': -58.85204146575928, 'time_step': 0.028488277196884154, 'td_error': 80.08409650025908, 'value_scale': 63.14269004178867, 'discounted_advantage': -47.84598427291133, 'initial_state': 69.1526870727539, 'diff_eval': 274.19848333608513} step=81000
2025-12-06 04:00.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.83it/s, vae_loss=0.00583, critic_loss=162, actor_loss=-61.5]


2025-12-06 04:00.42 [info     ] BCQ_20251206031126: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.002126516342163086, 'time_algorithm_update': 0.025795475244522095, 'vae_loss': 0.005825962595874444, 'critic_loss': 161.54364182281495, 'actor_loss': -61.554009132385254, 'time_step': 0.028185661792755126, 'td_error': 71.30133014823902, 'value_scale': 67.79583943542171, 'discounted_advantage': -62.99087724312151, 'initial_state': 71.8080062866211, 'diff_eval': 313.3387680160283} step=82000
2025-12-06 04:00.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.40it/s, vae_loss=0.00571, critic_loss=174, actor_loss=-64.4]


2025-12-06 04:01.16 [info     ] BCQ_20251206031126: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.0021790308952331543, 'time_algorithm_update': 0.02606916046142578, 'vae_loss': 0.005709877868182957, 'critic_loss': 174.02846631240845, 'actor_loss': -64.38407362747192, 'time_step': 0.02852427864074707, 'td_error': 62.526514867930764, 'value_scale': 71.21897952328534, 'discounted_advantage': -79.82441846959169, 'initial_state': 73.99201202392578, 'diff_eval': 340.28194968066975} step=83000
2025-12-06 04:01.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.89it/s, vae_loss=0.00593, critic_loss=190, actor_loss=-67.1]


2025-12-06 04:01.50 [info     ] BCQ_20251206031126: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.0021017100811004637, 'time_algorithm_update': 0.02578509044647217, 'vae_loss': 0.005926018820609898, 'critic_loss': 190.1612895889282, 'actor_loss': -67.18878632736207, 'time_step': 0.028139922857284547, 'td_error': 94.54625478393423, 'value_scale': 70.16397549960159, 'discounted_advantage': -54.65247741608968, 'initial_state': 67.27033996582031, 'diff_eval': 248.45807228462309} step=84000
2025-12-06 04:01.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.18it/s, vae_loss=0.0058, critic_loss=198, actor_loss=-70.3]


2025-12-06 04:02.24 [info     ] BCQ_20251206031126: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.0020986804962158205, 'time_algorithm_update': 0.025565171957015992, 'vae_loss': 0.005806962999748066, 'critic_loss': 197.74045377349853, 'actor_loss': -70.27714618301391, 'time_step': 0.027925001621246337, 'td_error': 81.17763736910861, 'value_scale': 74.09918502267928, 'discounted_advantage': -58.99149356328391, 'initial_state': 83.56902313232422, 'diff_eval': 265.28525595141423} step=85000
2025-12-06 04:02.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.83it/s, vae_loss=0.00559, critic_loss=217, actor_loss=-73.7]


2025-12-06 04:02.59 [info     ] BCQ_20251206031126: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.002105381965637207, 'time_algorithm_update': 0.02581754493713379, 'vae_loss': 0.005578815057175234, 'critic_loss': 217.00614072418213, 'actor_loss': -73.68483700942993, 'time_step': 0.028178644180297852, 'td_error': 85.81107265658409, 'value_scale': 80.15279052588286, 'discounted_advantage': -102.1935369292777, 'initial_state': 79.86935424804688, 'diff_eval': 331.0566078426177} step=86000
2025-12-06 04:02.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.55it/s, vae_loss=0.00574, critic_loss=222, actor_loss=-77.5]


2025-12-06 04:03.33 [info     ] BCQ_20251206031126: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.0021039726734161375, 'time_algorithm_update': 0.02601178240776062, 'vae_loss': 0.00576166391489096, 'critic_loss': 222.4492730102539, 'actor_loss': -77.53261683273315, 'time_step': 0.028387462139129637, 'td_error': 89.5559489574079, 'value_scale': 83.91568793483822, 'discounted_advantage': -100.54624520836786, 'initial_state': 82.5904312133789, 'diff_eval': 362.87650166850824} step=87000
2025-12-06 04:03.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.01it/s, vae_loss=0.00569, critic_loss=243, actor_loss=-80.8]


2025-12-06 04:04.07 [info     ] BCQ_20251206031126: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.0020307366847991943, 'time_algorithm_update': 0.025701380014419555, 'vae_loss': 0.005686723588267341, 'critic_loss': 243.3301065864563, 'actor_loss': -80.80897152709962, 'time_step': 0.027988959550857544, 'td_error': 106.82802992753514, 'value_scale': 87.06032829117795, 'discounted_advantage': -85.57587125042102, 'initial_state': 96.451171875, 'diff_eval': 303.46986102617154} step=88000
2025-12-06 04:04.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.36it/s, vae_loss=0.00555, critic_loss=258, actor_loss=-84.9]


2025-12-06 04:04.42 [info     ] BCQ_20251206031126: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.0021979708671569826, 'time_algorithm_update': 0.026097513198852538, 'vae_loss': 0.005547342660836875, 'critic_loss': 257.98676371765134, 'actor_loss': -84.90603186035156, 'time_step': 0.028553844690322877, 'td_error': 101.57870861060759, 'value_scale': 88.76349794762233, 'discounted_advantage': -94.93021836507602, 'initial_state': 95.57687377929688, 'diff_eval': 300.6694403367539} step=89000
2025-12-06 04:04.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.98it/s, vae_loss=0.00552, critic_loss=268, actor_loss=-89] 


2025-12-06 04:05.16 [info     ] BCQ_20251206031126: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.002187650203704834, 'time_algorithm_update': 0.0264237642288208, 'vae_loss': 0.0055272720190696414, 'critic_loss': 268.7774623184204, 'actor_loss': -89.07155062866211, 'time_step': 0.028893994092941286, 'td_error': 182.5743775518599, 'value_scale': 94.45521151451183, 'discounted_advantage': -83.48035963952238, 'initial_state': 99.9510726928711, 'diff_eval': 375.46509544867763} step=90000
2025-12-06 04:05.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.98it/s, vae_loss=0.00559, critic_loss=303, actor_loss=-93.9]


2025-12-06 04:05.51 [info     ] BCQ_20251206031126: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.002366077184677124, 'time_algorithm_update': 0.02629174494743347, 'vae_loss': 0.00558559753629379, 'critic_loss': 302.8794390411377, 'actor_loss': -93.97077111053467, 'time_step': 0.0289100501537323, 'td_error': 130.9264032666905, 'value_scale': 96.23740477617659, 'discounted_advantage': -84.05472239163167, 'initial_state': 98.36064147949219, 'diff_eval': 268.11600678671135} step=91000
2025-12-06 04:05.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.09it/s, vae_loss=0.00553, critic_loss=328, actor_loss=-98.8]


2025-12-06 04:06.25 [info     ] BCQ_20251206031126: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.002099652051925659, 'time_algorithm_update': 0.02560524582862854, 'vae_loss': 0.005531311623519286, 'critic_loss': 327.9781423950195, 'actor_loss': -98.77897348022461, 'time_step': 0.02795862054824829, 'td_error': 160.43509439495935, 'value_scale': 97.51121967393914, 'discounted_advantage': -90.25828476730835, 'initial_state': 105.9390640258789, 'diff_eval': 254.6502100927645} step=92000
2025-12-06 04:06.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.94it/s, vae_loss=0.00533, critic_loss=363, actor_loss=-103]


2025-12-06 04:07.00 [info     ] BCQ_20251206031126: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.002100221633911133, 'time_algorithm_update': 0.02657059669494629, 'vae_loss': 0.00532709307433106, 'critic_loss': 362.57550898742676, 'actor_loss': -103.4757275543213, 'time_step': 0.028922691583633423, 'td_error': 145.21979315149824, 'value_scale': 107.20662837672263, 'discounted_advantage': -128.4754859579206, 'initial_state': 113.08465576171875, 'diff_eval': 304.51268834446057} step=93000
2025-12-06 04:07.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.08it/s, vae_loss=0.00544, critic_loss=389, actor_loss=-109]


2025-12-06 04:07.35 [info     ] BCQ_20251206031126: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.0022050518989562987, 'time_algorithm_update': 0.026308572053909303, 'vae_loss': 0.005426564523950219, 'critic_loss': 389.2542600402832, 'actor_loss': -108.95674195098877, 'time_step': 0.028785978555679322, 'td_error': 127.75831417290031, 'value_scale': 110.33598660625836, 'discounted_advantage': -107.11898282797105, 'initial_state': 107.264892578125, 'diff_eval': 250.59425053165597} step=94000
2025-12-06 04:07.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.54it/s, vae_loss=0.00539, critic_loss=423, actor_loss=-115]


2025-12-06 04:08.09 [info     ] BCQ_20251206031126: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.0020936744213104247, 'time_algorithm_update': 0.02567327094078064, 'vae_loss': 0.005373899328056723, 'critic_loss': 422.1200255889893, 'actor_loss': -114.65457502746582, 'time_step': 0.028040984153747558, 'td_error': 163.58965163130011, 'value_scale': 116.01747238346238, 'discounted_advantage': -146.7225246623253, 'initial_state': 122.94210815429688, 'diff_eval': 370.3513293518034} step=95000
2025-12-06 04:08.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.03it/s, vae_loss=0.00538, critic_loss=465, actor_loss=-121]


2025-12-06 04:08.43 [info     ] BCQ_20251206031126: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.0020995895862579344, 'time_algorithm_update': 0.025659992456436156, 'vae_loss': 0.00537332628108561, 'critic_loss': 464.1905693359375, 'actor_loss': -121.39466747283936, 'time_step': 0.028012328147888184, 'td_error': 176.55960941503074, 'value_scale': 120.90009573486705, 'discounted_advantage': -118.37218012175857, 'initial_state': 134.84425354003906, 'diff_eval': 245.07768956589723} step=96000
2025-12-06 04:08.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.48it/s, vae_loss=0.00544, critic_loss=501, actor_loss=-128]


2025-12-06 04:09.18 [info     ] BCQ_20251206031126: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.00215410852432251, 'time_algorithm_update': 0.02601529574394226, 'vae_loss': 0.005444494500174187, 'critic_loss': 501.653645614624, 'actor_loss': -127.66229487609863, 'time_step': 0.028441967725753785, 'td_error': 169.73172277827018, 'value_scale': 131.60754996608108, 'discounted_advantage': -152.0038263396118, 'initial_state': 149.70285034179688, 'diff_eval': 311.61049683989955} step=97000
2025-12-06 04:09.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.62it/s, vae_loss=0.00548, critic_loss=553, actor_loss=-134]


2025-12-06 04:09.52 [info     ] BCQ_20251206031126: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.0021418328285217284, 'time_algorithm_update': 0.025922594308853148, 'vae_loss': 0.005479736746288836, 'critic_loss': 553.200552734375, 'actor_loss': -134.3328775100708, 'time_step': 0.02834443140029907, 'td_error': 212.66026790465943, 'value_scale': 131.7492150382544, 'discounted_advantage': -136.26987249082455, 'initial_state': 151.95957946777344, 'diff_eval': 353.76012191553286} step=98000
2025-12-06 04:09.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.00it/s, vae_loss=0.00515, critic_loss=593, actor_loss=-140]


2025-12-06 04:10.26 [info     ] BCQ_20251206031126: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.0021095128059387207, 'time_algorithm_update': 0.025670578718185424, 'vae_loss': 0.005160072403028607, 'critic_loss': 594.8736199493408, 'actor_loss': -140.1845411376953, 'time_step': 0.028041852951049804, 'td_error': 349.5948463744358, 'value_scale': 143.3555743448992, 'discounted_advantage': -218.53533377764919, 'initial_state': 165.6065673828125, 'diff_eval': 514.4675462739733} step=99000
2025-12-06 04:10.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.53it/s, vae_loss=0.00522, critic_loss=649, actor_loss=-147]


2025-12-06 04:11.01 [info     ] BCQ_20251206031126: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.0021016275882720947, 'time_algorithm_update': 0.026034653425216676, 'vae_loss': 0.005222859703935683, 'critic_loss': 649.7794290161132, 'actor_loss': -147.3896628112793, 'time_step': 0.02841512894630432, 'td_error': 304.8367165524619, 'value_scale': 141.909511378355, 'discounted_advantage': -173.7604740230897, 'initial_state': 155.1868896484375, 'diff_eval': 351.81282260018213} step=100000
2025-12-06 04:11.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.66it/s, vae_loss=0.00531, critic_loss=742, actor_loss=-153]


2025-12-06 04:11.35 [info     ] BCQ_20251206031126: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.0021329782009124757, 'time_algorithm_update': 0.025916072607040406, 'vae_loss': 0.005311389269772917, 'critic_loss': 742.4268081054687, 'actor_loss': -153.50593130493164, 'time_step': 0.028313473224639892, 'td_error': 227.346955921901, 'value_scale': 155.58972419326233, 'discounted_advantage': -176.8603044451847, 'initial_state': 160.3063507080078, 'diff_eval': 312.01900388970563} step=101000
2025-12-06 04:11.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.43it/s, vae_loss=0.00531, critic_loss=769, actor_loss=-161]


2025-12-06 04:12.09 [info     ] BCQ_20251206031126: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.002154508352279663, 'time_algorithm_update': 0.026105008363723755, 'vae_loss': 0.005313655955716968, 'critic_loss': 769.9772044067383, 'actor_loss': -160.58045407104493, 'time_step': 0.028509092092514038, 'td_error': 245.84629061549634, 'value_scale': 165.2235034272537, 'discounted_advantage': -154.4289305944392, 'initial_state': 177.96331787109375, 'diff_eval': 230.49966136295913} step=102000
2025-12-06 04:12.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.61it/s, vae_loss=0.00522, critic_loss=869, actor_loss=-169]


2025-12-06 04:12.44 [info     ] BCQ_20251206031126: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.0021420831680297853, 'time_algorithm_update': 0.025952404260635376, 'vae_loss': 0.005219950614497065, 'critic_loss': 868.8892671203613, 'actor_loss': -168.7797614440918, 'time_step': 0.028354727029800415, 'td_error': 326.3600372369931, 'value_scale': 162.58047167371024, 'discounted_advantage': -226.88136214008657, 'initial_state': 179.1006317138672, 'diff_eval': 352.89639238339106} step=103000
2025-12-06 04:12.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.94it/s, vae_loss=0.00522, critic_loss=976, actor_loss=-178]   


2025-12-06 04:13.18 [info     ] BCQ_20251206031126: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.0020932257175445557, 'time_algorithm_update': 0.02574510622024536, 'vae_loss': 0.0052209156639873985, 'critic_loss': 976.2027384033203, 'actor_loss': -177.62423403930663, 'time_step': 0.028085491180419923, 'td_error': 417.63899030458515, 'value_scale': 172.50145701071042, 'discounted_advantage': -160.74402249618174, 'initial_state': 184.50711059570312, 'diff_eval': 247.4207003586547} step=104000
2025-12-06 04:13.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.01it/s, vae_loss=0.00519, critic_loss=1.05e+3, actor_loss=-186]


2025-12-06 04:13.53 [info     ] BCQ_20251206031126: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.0022081105709075926, 'time_algorithm_update': 0.026397652864456177, 'vae_loss': 0.005195251069962979, 'critic_loss': 1052.4331911315917, 'actor_loss': -186.02979832458496, 'time_step': 0.0288815279006958, 'td_error': 387.1804300907889, 'value_scale': 186.49725186894744, 'discounted_advantage': -153.7369263644769, 'initial_state': 205.9110107421875, 'diff_eval': 250.26983421826444} step=105000
2025-12-06 04:13.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.87it/s, vae_loss=0.00522, critic_loss=1.2e+3, actor_loss=-195]


2025-12-06 04:14.27 [info     ] BCQ_20251206031126: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.0021125020980834962, 'time_algorithm_update': 0.02573884105682373, 'vae_loss': 0.005211325598182157, 'critic_loss': 1200.726805786133, 'actor_loss': -195.04918521118165, 'time_step': 0.028117459535598754, 'td_error': 451.9406162699522, 'value_scale': 179.9661527608661, 'discounted_advantage': -157.8936014960931, 'initial_state': 195.36749267578125, 'diff_eval': 236.5250698220345} step=106000
2025-12-06 04:14.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.66it/s, vae_loss=0.00505, critic_loss=1.27e+3, actor_loss=-205]


2025-12-06 04:15.01 [info     ] BCQ_20251206031126: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.002149986505508423, 'time_algorithm_update': 0.025898743391036986, 'vae_loss': 0.005051198252709582, 'critic_loss': 1264.8735043334962, 'actor_loss': -204.77283045959473, 'time_step': 0.028311695098876953, 'td_error': 444.91392431765865, 'value_scale': 200.81965558856652, 'discounted_advantage': -176.3953151455804, 'initial_state': 217.3187713623047, 'diff_eval': 228.0099855315576} step=107000
2025-12-06 04:15.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.38it/s, vae_loss=0.00499, critic_loss=1.41e+3, actor_loss=-216]


2025-12-06 04:15.36 [info     ] BCQ_20251206031126: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.0020829930305480957, 'time_algorithm_update': 0.02708928728103638, 'vae_loss': 0.004977441453374923, 'critic_loss': 1409.012689025879, 'actor_loss': -215.79399282836914, 'time_step': 0.02943504023551941, 'td_error': 487.58430768924626, 'value_scale': 208.00178557748018, 'discounted_advantage': -186.94677831265022, 'initial_state': 225.26824951171875, 'diff_eval': 226.0248142679738} step=108000
2025-12-06 04:15.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.85it/s, vae_loss=0.00511, critic_loss=1.59e+3, actor_loss=-227]


2025-12-06 04:16.11 [info     ] BCQ_20251206031126: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.0020919606685638427, 'time_algorithm_update': 0.025816902637481688, 'vae_loss': 0.005100046904059127, 'critic_loss': 1587.993385925293, 'actor_loss': -226.63515719604493, 'time_step': 0.028157777309417726, 'td_error': 588.9425410179614, 'value_scale': 212.1460959411227, 'discounted_advantage': -191.74578507045123, 'initial_state': 244.50013732910156, 'diff_eval': 216.23041752862883} step=109000
2025-12-06 04:16.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.20it/s, vae_loss=0.00502, critic_loss=1.76e+3, actor_loss=-237]


2025-12-06 04:16.44 [info     ] BCQ_20251206031126: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.002101243734359741, 'time_algorithm_update': 0.02550559186935425, 'vae_loss': 0.0050268437059130516, 'critic_loss': 1757.1715200195313, 'actor_loss': -237.37327770996095, 'time_step': 0.02787334942817688, 'td_error': 735.1580194838359, 'value_scale': 226.03347760418933, 'discounted_advantage': -223.17946160416687, 'initial_state': 245.3427734375, 'diff_eval': 294.45397706328885} step=110000
2025-12-06 04:16.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.07it/s, vae_loss=0.00497, critic_loss=1.83e+3, actor_loss=-249]


2025-12-06 04:17.18 [info     ] BCQ_20251206031126: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.002088989496231079, 'time_algorithm_update': 0.02566216778755188, 'vae_loss': 0.004966451567830518, 'critic_loss': 1832.4217821044922, 'actor_loss': -249.47161672973633, 'time_step': 0.028004919290542604, 'td_error': 654.3399661933299, 'value_scale': 234.78722689874715, 'discounted_advantage': -275.4801639780334, 'initial_state': 258.9863586425781, 'diff_eval': 311.4081190898896} step=111000
2025-12-06 04:17.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.10it/s, vae_loss=0.00498, critic_loss=2.09e+3, actor_loss=-261]


2025-12-06 04:17.52 [info     ] BCQ_20251206031126: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.0020797717571258544, 'time_algorithm_update': 0.025638059854507445, 'vae_loss': 0.00497209532209672, 'critic_loss': 2085.4604595947267, 'actor_loss': -261.5738656768799, 'time_step': 0.027969033002853392, 'td_error': 643.4969250684738, 'value_scale': 250.48515833420905, 'discounted_advantage': -269.8920545736352, 'initial_state': 275.74053955078125, 'diff_eval': 246.54701819518672} step=112000
2025-12-06 04:17.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.81it/s, vae_loss=0.0049, critic_loss=2.25e+3, actor_loss=-275]


2025-12-06 04:18.27 [info     ] BCQ_20251206031126: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.0021778109073638916, 'time_algorithm_update': 0.02658732008934021, 'vae_loss': 0.004896998053882271, 'critic_loss': 2246.6764919433595, 'actor_loss': -274.9945295562744, 'time_step': 0.029050243616104126, 'td_error': 836.2580628601212, 'value_scale': 270.47723864429855, 'discounted_advantage': -376.6283854279066, 'initial_state': 286.4168701171875, 'diff_eval': 354.5394020625589} step=113000
2025-12-06 04:18.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.42it/s, vae_loss=0.0048, critic_loss=2.43e+3, actor_loss=-289]


2025-12-06 04:19.02 [info     ] BCQ_20251206031126: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.002110703945159912, 'time_algorithm_update': 0.02615335488319397, 'vae_loss': 0.004792722628684715, 'critic_loss': 2430.1448479003907, 'actor_loss': -288.91242985534666, 'time_step': 0.028525432348251344, 'td_error': 764.0326808311518, 'value_scale': 269.4226552531785, 'discounted_advantage': -286.2869358766168, 'initial_state': 299.353759765625, 'diff_eval': 242.23502858363966} step=114000
2025-12-06 04:19.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.78it/s, vae_loss=0.00491, critic_loss=2.8e+3, actor_loss=-305]


2025-12-06 04:19.36 [info     ] BCQ_20251206031126: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.0021025149822235106, 'time_algorithm_update': 0.025885468244552613, 'vae_loss': 0.004923514884430915, 'critic_loss': 2795.4894343261717, 'actor_loss': -305.12049299621583, 'time_step': 0.028226234674453735, 'td_error': 912.866855334444, 'value_scale': 289.8070137029372, 'discounted_advantage': -296.8079670808929, 'initial_state': 304.9803771972656, 'diff_eval': 301.98344341645935} step=115000
2025-12-06 04:19.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.60it/s, vae_loss=0.0048, critic_loss=2.89e+3, actor_loss=-317]


2025-12-06 04:20.10 [info     ] BCQ_20251206031126: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.0021323676109313966, 'time_algorithm_update': 0.025953210830688477, 'vae_loss': 0.004803689830238, 'critic_loss': 2883.1350686035157, 'actor_loss': -317.4607592315674, 'time_step': 0.02834279441833496, 'td_error': 900.2766821290677, 'value_scale': 300.9324648374534, 'discounted_advantage': -346.99601868488867, 'initial_state': 332.4912414550781, 'diff_eval': 292.63673899601287} step=116000
2025-12-06 04:20.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.09it/s, vae_loss=0.00492, critic_loss=3.22e+3, actor_loss=-331]


2025-12-06 04:20.44 [info     ] BCQ_20251206031126: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.0021005754470825194, 'time_algorithm_update': 0.025631498336791993, 'vae_loss': 0.0049143152757314965, 'critic_loss': 3223.7039191894532, 'actor_loss': -331.08244958496095, 'time_step': 0.027975691080093384, 'td_error': 1205.746532213445, 'value_scale': 308.7757367766013, 'discounted_advantage': -282.6326005567134, 'initial_state': 340.6844482421875, 'diff_eval': 218.38326715878213} step=117000
2025-12-06 04:20.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.53it/s, vae_loss=0.00492, critic_loss=3.42e+3, actor_loss=-344]


2025-12-06 04:21.18 [info     ] BCQ_20251206031126: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.00217459774017334, 'time_algorithm_update': 0.02602554702758789, 'vae_loss': 0.004921494434354827, 'critic_loss': 3420.983530517578, 'actor_loss': -343.70127728271484, 'time_step': 0.028454065322875977, 'td_error': 1297.3512287828817, 'value_scale': 309.2691006752891, 'discounted_advantage': -314.63789207641764, 'initial_state': 337.2056884765625, 'diff_eval': 243.511009569407} step=118000
2025-12-06 04:21.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.85it/s, vae_loss=0.00465, critic_loss=3.54e+3, actor_loss=-357]


2025-12-06 04:21.52 [info     ] BCQ_20251206031126: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.002113507270812988, 'time_algorithm_update': 0.025740312814712524, 'vae_loss': 0.004651564753148704, 'critic_loss': 3531.85315625, 'actor_loss': -357.2097361450195, 'time_step': 0.028132776737213136, 'td_error': 1097.5739289251346, 'value_scale': 343.30029479865755, 'discounted_advantage': -418.0279264343256, 'initial_state': 383.9921569824219, 'diff_eval': 306.25194111576} step=119000
2025-12-06 04:21.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.68it/s, vae_loss=0.0048, critic_loss=3.86e+3, actor_loss=-371]


2025-12-06 04:22.26 [info     ] BCQ_20251206031126: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.002124830007553101, 'time_algorithm_update': 0.025909075260162354, 'vae_loss': 0.0047988952497253195, 'critic_loss': 3874.2715657958984, 'actor_loss': -370.9569158630371, 'time_step': 0.028300004482269287, 'td_error': 1662.418404470824, 'value_scale': 339.5287916865153, 'discounted_advantage': -388.6290877458333, 'initial_state': 369.60687255859375, 'diff_eval': 260.90196532827696} step=120000
2025-12-06 04:22.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.24it/s, vae_loss=0.00477, critic_loss=4.2e+3, actor_loss=-385]


2025-12-06 04:23.01 [info     ] BCQ_20251206031126: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.0021978585720062254, 'time_algorithm_update': 0.026224973201751708, 'vae_loss': 0.004771784459473565, 'critic_loss': 4195.279716186524, 'actor_loss': -384.89156408691406, 'time_step': 0.028679906368255615, 'td_error': 2011.1945172440767, 'value_scale': 357.5203459349532, 'discounted_advantage': -314.340107478, 'initial_state': 407.023681640625, 'diff_eval': 226.542862202122} step=121000
2025-12-06 04:23.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.36it/s, vae_loss=0.00475, critic_loss=4.55e+3, actor_loss=-399]


2025-12-06 04:23.35 [info     ] BCQ_20251206031126: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.0020713438987731933, 'time_algorithm_update': 0.025437420606613158, 'vae_loss': 0.004748226786730811, 'critic_loss': 4542.946066894531, 'actor_loss': -399.1526271057129, 'time_step': 0.027759092569351197, 'td_error': 1306.3444229580828, 'value_scale': 372.17473970125025, 'discounted_advantage': -403.58928129290257, 'initial_state': 401.0126037597656, 'diff_eval': 282.5932539595447} step=122000
2025-12-06 04:23.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.32it/s, vae_loss=0.00454, critic_loss=4.66e+3, actor_loss=-412]


2025-12-06 04:24.09 [info     ] BCQ_20251206031126: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.002178946018218994, 'time_algorithm_update': 0.026154230356216432, 'vae_loss': 0.004555461532436311, 'critic_loss': 4682.476133544922, 'actor_loss': -411.862499420166, 'time_step': 0.02858978533744812, 'td_error': 1501.052846091354, 'value_scale': 373.4693511972783, 'discounted_advantage': -359.443956103316, 'initial_state': 437.86077880859375, 'diff_eval': 233.75593624507175} step=123000
2025-12-06 04:24.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.53it/s, vae_loss=0.00462, critic_loss=5.12e+3, actor_loss=-425]


2025-12-06 04:24.46 [info     ] BCQ_20251206031126: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.0021226749420166016, 'time_algorithm_update': 0.026824738264083863, 'vae_loss': 0.004616854186868295, 'critic_loss': 5129.096802490234, 'actor_loss': -424.57574978637695, 'time_step': 0.029237027645111085, 'td_error': 2871.8804562509017, 'value_scale': 382.61630874377585, 'discounted_advantage': -285.9345725568009, 'initial_state': 454.7299499511719, 'diff_eval': 235.7381412830869} step=124000
2025-12-06 04:24.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.91it/s, vae_loss=0.00467, critic_loss=5.24e+3, actor_loss=-439]


2025-12-06 04:25.22 [info     ] BCQ_20251206031126: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.0021302204132080077, 'time_algorithm_update': 0.02746579384803772, 'vae_loss': 0.004672266681678593, 'critic_loss': 5250.214217529297, 'actor_loss': -438.95393618774415, 'time_step': 0.029852301120758058, 'td_error': 1610.181590241727, 'value_scale': 414.4756904658608, 'discounted_advantage': -477.5840481660676, 'initial_state': 446.52081298828125, 'diff_eval': 259.1617796450102} step=125000
2025-12-06 04:25.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.84it/s, vae_loss=0.00464, critic_loss=5.6e+3, actor_loss=-454]


2025-12-06 04:25.56 [info     ] BCQ_20251206031126: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.0021115431785583495, 'time_algorithm_update': 0.02578399157524109, 'vae_loss': 0.004641705329995602, 'critic_loss': 5602.857580322266, 'actor_loss': -454.28154190063475, 'time_step': 0.028167627096176148, 'td_error': 1615.256575569209, 'value_scale': 439.5869388679247, 'discounted_advantage': -466.56834636387305, 'initial_state': 485.259765625, 'diff_eval': 252.27001606196674} step=126000
2025-12-06 04:25.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.52it/s, vae_loss=0.00463, critic_loss=5.93e+3, actor_loss=-466]


2025-12-06 04:26.30 [info     ] BCQ_20251206031126: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.0021395113468170167, 'time_algorithm_update': 0.026018385410308837, 'vae_loss': 0.004637410236056894, 'critic_loss': 5927.962125366211, 'actor_loss': -465.58631814575193, 'time_step': 0.028422176361083986, 'td_error': 1897.1281404034917, 'value_scale': 449.10643711473796, 'discounted_advantage': -541.8858719358841, 'initial_state': 505.855712890625, 'diff_eval': 364.04122128221877} step=127000
2025-12-06 04:26.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.85it/s, vae_loss=0.00465, critic_loss=6.07e+3, actor_loss=-476]


2025-12-06 04:27.05 [info     ] BCQ_20251206031126: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.0020719192028045654, 'time_algorithm_update': 0.026656601428985596, 'vae_loss': 0.004647903557866812, 'critic_loss': 6077.502421142578, 'actor_loss': -475.93613150024413, 'time_step': 0.028998984098434447, 'td_error': 1916.9085712396538, 'value_scale': 427.6995652244557, 'discounted_advantage': -368.81538132442796, 'initial_state': 484.18609619140625, 'diff_eval': 212.37729954474622} step=128000
2025-12-06 04:27.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.00it/s, vae_loss=0.00457, critic_loss=6.46e+3, actor_loss=-488]


2025-12-06 04:27.39 [info     ] BCQ_20251206031126: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.0020987784862518313, 'time_algorithm_update': 0.0256922881603241, 'vae_loss': 0.004561483651865273, 'critic_loss': 6446.687508544922, 'actor_loss': -488.13586254882813, 'time_step': 0.028043575763702394, 'td_error': 1911.5620155034885, 'value_scale': 449.91414196673105, 'discounted_advantage': -503.54519250911676, 'initial_state': 489.4329833984375, 'diff_eval': 260.0958380959186} step=129000
2025-12-06 04:27.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.18it/s, vae_loss=0.00463, critic_loss=6.57e+3, actor_loss=-497]


2025-12-06 04:28.14 [info     ] BCQ_20251206031126: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.002158229112625122, 'time_algorithm_update': 0.026292185068130493, 'vae_loss': 0.0046363830927293745, 'critic_loss': 6579.423499267578, 'actor_loss': -497.22351708984377, 'time_step': 0.028708312034606935, 'td_error': 4390.698905584428, 'value_scale': 453.6957242312995, 'discounted_advantage': -351.4485197539096, 'initial_state': 533.2313842773438, 'diff_eval': 273.8613207215569} step=130000
2025-12-06 04:28.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.95it/s, vae_loss=0.00445, critic_loss=6.83e+3, actor_loss=-507]


2025-12-06 04:28.51 [info     ] BCQ_20251206031126: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.002291985273361206, 'time_algorithm_update': 0.028156147480010985, 'vae_loss': 0.00445784590463154, 'critic_loss': 6830.06446484375, 'actor_loss': -507.26000103759765, 'time_step': 0.030724271059036256, 'td_error': 2473.460492362639, 'value_scale': 469.971425350234, 'discounted_advantage': -416.8219735806256, 'initial_state': 506.81280517578125, 'diff_eval': 281.08727245559123} step=131000
2025-12-06 04:28.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.23it/s, vae_loss=0.00452, critic_loss=7.19e+3, actor_loss=-518]


2025-12-06 04:29.27 [info     ] BCQ_20251206031126: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.002353795289993286, 'time_algorithm_update': 0.02776037549972534, 'vae_loss': 0.004521379290497862, 'critic_loss': 7193.003661132812, 'actor_loss': -518.5510888671874, 'time_step': 0.03041852355003357, 'td_error': 1834.0292358452243, 'value_scale': 494.24356007236247, 'discounted_advantage': -455.4509539037146, 'initial_state': 556.481201171875, 'diff_eval': 240.11322411933244} step=132000
2025-12-06 04:29.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.51it/s, vae_loss=0.00454, critic_loss=7.72e+3, actor_loss=-526]


2025-12-06 04:30.02 [info     ] BCQ_20251206031126: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.0021108109951019287, 'time_algorithm_update': 0.026088242053985595, 'vae_loss': 0.004540232482133433, 'critic_loss': 7730.864098876953, 'actor_loss': -526.2993711242676, 'time_step': 0.028454752206802367, 'td_error': 2558.108624189846, 'value_scale': 492.80024987534125, 'discounted_advantage': -595.2621816151385, 'initial_state': 564.04931640625, 'diff_eval': 326.39415534007964} step=133000
2025-12-06 04:30.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.91it/s, vae_loss=0.00443, critic_loss=7.62e+3, actor_loss=-537]


2025-12-06 04:30.36 [info     ] BCQ_20251206031126: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.0021001646518707274, 'time_algorithm_update': 0.025803205728530884, 'vae_loss': 0.004433419734705239, 'critic_loss': 7630.827877929688, 'actor_loss': -536.4185342407227, 'time_step': 0.02813987374305725, 'td_error': 1816.8673092254285, 'value_scale': 510.7107586907621, 'discounted_advantage': -551.2789513188757, 'initial_state': 572.56396484375, 'diff_eval': 302.3371806239078} step=134000
2025-12-06 04:30.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.56it/s, vae_loss=0.00447, critic_loss=7.96e+3, actor_loss=-549]


2025-12-06 04:31.10 [info     ] BCQ_20251206031126: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.002172748804092407, 'time_algorithm_update': 0.025999657392501832, 'vae_loss': 0.00447340648365207, 'critic_loss': 7969.679234863282, 'actor_loss': -548.8112033996582, 'time_step': 0.028427651166915893, 'td_error': 2223.403798738047, 'value_scale': 507.1583368486451, 'discounted_advantage': -578.1812724893787, 'initial_state': 565.5341796875, 'diff_eval': 323.19361597239975} step=135000
2025-12-06 04:31.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.21it/s, vae_loss=0.00457, critic_loss=8.29e+3, actor_loss=-552]


2025-12-06 04:31.45 [info     ] BCQ_20251206031126: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.002207571029663086, 'time_algorithm_update': 0.026215703725814818, 'vae_loss': 0.004567413329146802, 'critic_loss': 8280.377393310548, 'actor_loss': -552.3151379394532, 'time_step': 0.0286941020488739, 'td_error': 3520.8699549570965, 'value_scale': 514.7726002662549, 'discounted_advantage': -439.30314488374944, 'initial_state': 589.5526733398438, 'diff_eval': 205.8834380834341} step=136000
2025-12-06 04:31.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.37it/s, vae_loss=0.00445, critic_loss=8.21e+3, actor_loss=-558]


2025-12-06 04:32.20 [info     ] BCQ_20251206031126: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.0021499550342559816, 'time_algorithm_update': 0.02611355137825012, 'vae_loss': 0.004463226136402227, 'critic_loss': 8210.326673583984, 'actor_loss': -557.8799591369628, 'time_step': 0.028533367395401, 'td_error': 2892.978914982211, 'value_scale': 523.5927040272167, 'discounted_advantage': -392.96712178113296, 'initial_state': 611.8942260742188, 'diff_eval': 227.30919775643272} step=137000
2025-12-06 04:32.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.84it/s, vae_loss=0.00443, critic_loss=8.23e+3, actor_loss=-561]


2025-12-06 04:32.54 [info     ] BCQ_20251206031126: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.0020994904041290283, 'time_algorithm_update': 0.025803300142288207, 'vae_loss': 0.004436816475586965, 'critic_loss': 8218.76040649414, 'actor_loss': -561.3847610168457, 'time_step': 0.028165384292602538, 'td_error': 2711.233434520238, 'value_scale': 535.4572415711594, 'discounted_advantage': -630.5778700972718, 'initial_state': 619.2421264648438, 'diff_eval': 335.83580947202495} step=138000
2025-12-06 04:32.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.87it/s, vae_loss=0.00441, critic_loss=8.54e+3, actor_loss=-561]


2025-12-06 04:33.28 [info     ] BCQ_20251206031126: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.0021400558948516846, 'time_algorithm_update': 0.02575019073486328, 'vae_loss': 0.004404496352770366, 'critic_loss': 8537.708409667968, 'actor_loss': -561.6412263488769, 'time_step': 0.028149481058120727, 'td_error': 3241.949913795297, 'value_scale': 523.5750339073687, 'discounted_advantage': -462.07481795726034, 'initial_state': 645.617431640625, 'diff_eval': 239.57854828898996} step=139000
2025-12-06 04:33.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.41it/s, vae_loss=0.00438, critic_loss=8.46e+3, actor_loss=-561]


2025-12-06 04:34.02 [info     ] BCQ_20251206031126: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.0021861538887023926, 'time_algorithm_update': 0.026122671604156496, 'vae_loss': 0.004382756781531498, 'critic_loss': 8465.444328857422, 'actor_loss': -560.5581335449219, 'time_step': 0.028549878120422364, 'td_error': 2104.149933023358, 'value_scale': 508.46100291522333, 'discounted_advantage': -470.9956853422383, 'initial_state': 594.2001342773438, 'diff_eval': 232.8305051525497} step=140000
2025-12-06 04:34.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.47it/s, vae_loss=0.00438, critic_loss=8.44e+3, actor_loss=-557]


2025-12-06 04:34.37 [info     ] BCQ_20251206031126: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.002049345254898071, 'time_algorithm_update': 0.026182013988494873, 'vae_loss': 0.004372495247749611, 'critic_loss': 8430.757078125, 'actor_loss': -557.265327697754, 'time_step': 0.028489571571350097, 'td_error': 2470.5083806780935, 'value_scale': 511.7345193289751, 'discounted_advantage': -595.8105123659163, 'initial_state': 580.3770141601562, 'diff_eval': 277.5600725071084} step=141000
2025-12-06 04:34.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.13it/s, vae_loss=0.00433, critic_loss=8.3e+3, actor_loss=-558]


2025-12-06 04:35.11 [info     ] BCQ_20251206031126: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.002155320405960083, 'time_algorithm_update': 0.026271456480026247, 'vae_loss': 0.004336420292849652, 'critic_loss': 8301.760181640626, 'actor_loss': -558.21446875, 'time_step': 0.028712249040603637, 'td_error': 4512.402651273202, 'value_scale': 510.8104190457197, 'discounted_advantage': -391.7194793812476, 'initial_state': 636.1953125, 'diff_eval': 229.5692139903347} step=142000
2025-12-06 04:35.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.92it/s, vae_loss=0.00435, critic_loss=9.01e+3, actor_loss=-557]


2025-12-06 04:35.46 [info     ] BCQ_20251206031126: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.0020876195430755617, 'time_algorithm_update': 0.025767192840576172, 'vae_loss': 0.004348932663095184, 'critic_loss': 9010.18524975586, 'actor_loss': -556.9948468933105, 'time_step': 0.028115538120269776, 'td_error': 2458.933072152757, 'value_scale': 519.6751999289472, 'discounted_advantage': -650.8587870439201, 'initial_state': 628.3239135742188, 'diff_eval': 340.0067787997637} step=143000
2025-12-06 04:35.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.90it/s, vae_loss=0.00426, critic_loss=8.7e+3, actor_loss=-557]


2025-12-06 04:36.20 [info     ] BCQ_20251206031126: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.0020684654712677, 'time_algorithm_update': 0.02668562650680542, 'vae_loss': 0.00425445299432613, 'critic_loss': 8690.592869384765, 'actor_loss': -557.1269541015625, 'time_step': 0.029009432792663575, 'td_error': 2009.7272832332235, 'value_scale': 504.7156899805241, 'discounted_advantage': -489.0375421994141, 'initial_state': 600.67236328125, 'diff_eval': 214.03484974673663} step=144000
2025-12-06 04:36.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.02it/s, vae_loss=0.00436, critic_loss=9.09e+3, actor_loss=-555]


2025-12-06 04:36.55 [info     ] BCQ_20251206031126: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.0021883859634399413, 'time_algorithm_update': 0.025630036354064942, 'vae_loss': 0.004362749626161531, 'critic_loss': 9100.704849121094, 'actor_loss': -554.7063479003906, 'time_step': 0.028066377639770507, 'td_error': 7942.790780541541, 'value_scale': 499.7037789955523, 'discounted_advantage': -356.65806693651353, 'initial_state': 611.1083374023438, 'diff_eval': 292.43506897046143} step=145000
2025-12-06 04:36.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.77it/s, vae_loss=0.00437, critic_loss=8.95e+3, actor_loss=-554]


2025-12-06 04:37.29 [info     ] BCQ_20251206031126: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.0021693556308746338, 'time_algorithm_update': 0.02580205273628235, 'vae_loss': 0.004361438572290354, 'critic_loss': 8945.64213720703, 'actor_loss': -553.7566086120605, 'time_step': 0.028231948137283327, 'td_error': 2192.4185402982407, 'value_scale': 513.4044737402172, 'discounted_advantage': -584.1535165498015, 'initial_state': 626.4132080078125, 'diff_eval': 289.5009041066638} step=146000
2025-12-06 04:37.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.95it/s, vae_loss=0.0043, critic_loss=8.82e+3, actor_loss=-547]


2025-12-06 04:38.03 [info     ] BCQ_20251206031126: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.002131810665130615, 'time_algorithm_update': 0.025736201763153076, 'vae_loss': 0.004303717592149042, 'critic_loss': 8822.533452636719, 'actor_loss': -546.7716156921387, 'time_step': 0.028115007162094117, 'td_error': 2858.1331726600997, 'value_scale': 502.1183889979389, 'discounted_advantage': -360.6298811149092, 'initial_state': 617.461669921875, 'diff_eval': 165.3863597674488} step=147000
2025-12-06 04:38.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.77it/s, vae_loss=0.00415, critic_loss=8.48e+3, actor_loss=-545]


2025-12-06 04:38.38 [info     ] BCQ_20251206031126: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.00210386848449707, 'time_algorithm_update': 0.025847853660583495, 'vae_loss': 0.004170684635057114, 'critic_loss': 8500.972758056641, 'actor_loss': -544.6079696655273, 'time_step': 0.028217635869979857, 'td_error': 2034.3568749029484, 'value_scale': 517.4481373121283, 'discounted_advantage': -582.8288044007476, 'initial_state': 613.1646728515625, 'diff_eval': 286.7293153017684} step=148000
2025-12-06 04:38.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.93it/s, vae_loss=0.00416, critic_loss=8.42e+3, actor_loss=-541]


2025-12-06 04:39.12 [info     ] BCQ_20251206031126: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.002104429244995117, 'time_algorithm_update': 0.02574582576751709, 'vae_loss': 0.004167547592543997, 'critic_loss': 8422.629219726563, 'actor_loss': -541.5488623962402, 'time_step': 0.028110950469970704, 'td_error': 2390.723257732362, 'value_scale': 491.2504281770853, 'discounted_advantage': -422.48511202340273, 'initial_state': 619.082763671875, 'diff_eval': 189.83704319792827} step=149000
2025-12-06 04:39.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.05it/s, vae_loss=0.00415, critic_loss=8.49e+3, actor_loss=-536]


2025-12-06 04:39.47 [info     ] BCQ_20251206031126: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.0021537792682647704, 'time_algorithm_update': 0.0264154953956604, 'vae_loss': 0.004150230590603314, 'critic_loss': 8489.705668945313, 'actor_loss': -536.7540930786133, 'time_step': 0.02883797526359558, 'td_error': 2558.3017792262335, 'value_scale': 494.42977938538127, 'discounted_advantage': -522.8969262557627, 'initial_state': 631.8936157226562, 'diff_eval': 243.81405498204577} step=150000
2025-12-06 04:39.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.47it/s, vae_loss=0.00422, critic_loss=8.59e+3, actor_loss=-534]


2025-12-06 04:40.22 [info     ] BCQ_20251206031126: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.0022914738655090334, 'time_algorithm_update': 0.026741682052612306, 'vae_loss': 0.004224485112354159, 'critic_loss': 8586.173745605469, 'actor_loss': -534.304315826416, 'time_step': 0.02930758047103882, 'td_error': 1837.4183286853315, 'value_scale': 493.6295875336878, 'discounted_advantage': -471.140001845072, 'initial_state': 592.4939575195312, 'diff_eval': 248.83280891622198} step=151000
2025-12-06 04:40.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.28it/s, vae_loss=0.00435, critic_loss=8.62e+3, actor_loss=-530]


2025-12-06 04:40.56 [info     ] BCQ_20251206031126: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.0022087860107421874, 'time_algorithm_update': 0.026134056329727172, 'vae_loss': 0.004354651926085353, 'critic_loss': 8617.194595214844, 'actor_loss': -530.2649913635254, 'time_step': 0.028616021156311035, 'td_error': 3716.3051419458457, 'value_scale': 474.27671872780087, 'discounted_advantage': -439.56774034714095, 'initial_state': 591.6107788085938, 'diff_eval': 276.02848729547617} step=152000
2025-12-06 04:40.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.82it/s, vae_loss=0.00417, critic_loss=8.63e+3, actor_loss=-525]


2025-12-06 04:41.31 [info     ] BCQ_20251206031126: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.0021196513175964354, 'time_algorithm_update': 0.025747191905975342, 'vae_loss': 0.0041815911651356145, 'critic_loss': 8627.81982373047, 'actor_loss': -524.5597782287598, 'time_step': 0.02815007305145264, 'td_error': 3002.1562509563755, 'value_scale': 472.1761174260671, 'discounted_advantage': -383.0801648426828, 'initial_state': 596.4711303710938, 'diff_eval': 193.87569214631665} step=153000
2025-12-06 04:41.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.92it/s, vae_loss=0.00406, critic_loss=8.08e+3, actor_loss=-519]


2025-12-06 04:42.05 [info     ] BCQ_20251206031126: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.0020868208408355714, 'time_algorithm_update': 0.025753649473190306, 'vae_loss': 0.00406488428870216, 'critic_loss': 8090.702317382813, 'actor_loss': -519.0971290893555, 'time_step': 0.028110663890838623, 'td_error': 2605.663975280264, 'value_scale': 469.5791078926332, 'discounted_advantage': -426.7386828282714, 'initial_state': 581.2791748046875, 'diff_eval': 208.0735727436429} step=154000
2025-12-06 04:42.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.71it/s, vae_loss=0.00415, critic_loss=8.24e+3, actor_loss=-512]


2025-12-06 04:42.39 [info     ] BCQ_20251206031126: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.0021207194328308103, 'time_algorithm_update': 0.025889796257019045, 'vae_loss': 0.004149417094769888, 'critic_loss': 8249.618491699219, 'actor_loss': -512.5519548645019, 'time_step': 0.0282700514793396, 'td_error': 2321.089894117475, 'value_scale': 459.6527584787686, 'discounted_advantage': -442.781119520646, 'initial_state': 572.5536499023438, 'diff_eval': 208.63030834858833} step=155000
2025-12-06 04:42.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.75it/s, vae_loss=0.00413, critic_loss=7.83e+3, actor_loss=-506]


2025-12-06 04:43.13 [info     ] BCQ_20251206031126: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.0021419692039489745, 'time_algorithm_update': 0.02585534715652466, 'vae_loss': 0.00412735094584059, 'critic_loss': 7829.225914550781, 'actor_loss': -506.51589700317385, 'time_step': 0.028251455307006835, 'td_error': 2932.5539807611367, 'value_scale': 445.2495277566962, 'discounted_advantage': -329.60821704604086, 'initial_state': 571.0081787109375, 'diff_eval': 195.59207570199825} step=156000
2025-12-06 04:43.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.81it/s, vae_loss=0.00412, critic_loss=7.97e+3, actor_loss=-497]


2025-12-06 04:43.48 [info     ] BCQ_20251206031126: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.0020904650688171387, 'time_algorithm_update': 0.025859356164932252, 'vae_loss': 0.004119459867244587, 'critic_loss': 7972.578245361328, 'actor_loss': -497.4405118103027, 'time_step': 0.028204006433486938, 'td_error': 2320.480438018565, 'value_scale': 451.5956992997608, 'discounted_advantage': -540.1871913908509, 'initial_state': 563.767333984375, 'diff_eval': 305.6369558017283} step=157000
2025-12-06 04:43.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.36it/s, vae_loss=0.00411, critic_loss=7.66e+3, actor_loss=-486]


2025-12-06 04:44.22 [info     ] BCQ_20251206031126: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.002151540517807007, 'time_algorithm_update': 0.02614321208000183, 'vae_loss': 0.00410504099773243, 'critic_loss': 7643.333416992187, 'actor_loss': -485.8975020141602, 'time_step': 0.028558847188949583, 'td_error': 1685.333828640195, 'value_scale': 448.348477226191, 'discounted_advantage': -460.1793795687231, 'initial_state': 568.1416625976562, 'diff_eval': 281.1084836814406} step=158000
2025-12-06 04:44.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.04it/s, vae_loss=0.00409, critic_loss=7.36e+3, actor_loss=-473]


2025-12-06 04:44.57 [info     ] BCQ_20251206031126: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.0024019031524658203, 'time_algorithm_update': 0.026186224222183228, 'vae_loss': 0.004087207946227863, 'critic_loss': 7349.4551005859375, 'actor_loss': -472.4615333557129, 'time_step': 0.028850175857543944, 'td_error': 1787.505930050427, 'value_scale': 426.1767930736686, 'discounted_advantage': -394.7113951660946, 'initial_state': 540.703857421875, 'diff_eval': 210.7489008520632} step=159000
2025-12-06 04:44.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.26it/s, vae_loss=0.0041, critic_loss=6.98e+3, actor_loss=-459]


2025-12-06 04:45.32 [info     ] BCQ_20251206031126: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.002100508213043213, 'time_algorithm_update': 0.026305694818496705, 'vae_loss': 0.004096066269674338, 'critic_loss': 6996.888314453125, 'actor_loss': -458.9844859313965, 'time_step': 0.028674483776092528, 'td_error': 3899.5793136660664, 'value_scale': 378.23079387752426, 'discounted_advantage': -216.96034435060002, 'initial_state': 496.8450927734375, 'diff_eval': 184.45198442668243} step=160000
2025-12-06 04:45.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.72it/s, vae_loss=0.00404, critic_loss=6.76e+3, actor_loss=-445]


2025-12-06 04:46.06 [info     ] BCQ_20251206031126: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.0021401028633117676, 'time_algorithm_update': 0.02589759135246277, 'vae_loss': 0.004028804459492676, 'critic_loss': 6752.906380615234, 'actor_loss': -445.11967245483396, 'time_step': 0.028296449899673462, 'td_error': 1749.357228597806, 'value_scale': 398.53152432006044, 'discounted_advantage': -463.20758526339716, 'initial_state': 488.3304748535156, 'diff_eval': 288.18368067102676} step=161000
2025-12-06 04:46.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.74it/s, vae_loss=0.0039, critic_loss=6.29e+3, actor_loss=-427]


2025-12-06 04:46.41 [info     ] BCQ_20251206031126: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.0020620815753936766, 'time_algorithm_update': 0.026809314727783203, 'vae_loss': 0.0038956453419523316, 'critic_loss': 6276.423189208985, 'actor_loss': -427.1091971740723, 'time_step': 0.02911412000656128, 'td_error': 1865.7569173327538, 'value_scale': 383.75171827090855, 'discounted_advantage': -444.1958787422174, 'initial_state': 490.3926696777344, 'diff_eval': 298.0530035562286} step=162000
2025-12-06 04:46.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.77it/s, vae_loss=0.00408, critic_loss=6.23e+3, actor_loss=-413]


2025-12-06 04:47.15 [info     ] BCQ_20251206031126: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.002111194610595703, 'time_algorithm_update': 0.025846468448638915, 'vae_loss': 0.004080032231868245, 'critic_loss': 6233.427062011719, 'actor_loss': -413.21891534423827, 'time_step': 0.028226814985275267, 'td_error': 1691.742947653141, 'value_scale': 365.438938156807, 'discounted_advantage': -392.1504762631887, 'initial_state': 477.62445068359375, 'diff_eval': 274.00988449693443} step=163000
2025-12-06 04:47.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.03it/s, vae_loss=0.00404, critic_loss=5.82e+3, actor_loss=-397]


2025-12-06 04:47.50 [info     ] BCQ_20251206031126: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.002169452905654907, 'time_algorithm_update': 0.0264456582069397, 'vae_loss': 0.004040074153686873, 'critic_loss': 5816.262510253906, 'actor_loss': -396.80647778320315, 'time_step': 0.028876408576965332, 'td_error': 1758.6840145652238, 'value_scale': 331.7377496157388, 'discounted_advantage': -250.29997160745225, 'initial_state': 443.3736572265625, 'diff_eval': 185.47264267450402} step=164000
2025-12-06 04:47.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.83it/s, vae_loss=0.00406, critic_loss=5.58e+3, actor_loss=-381]


2025-12-06 04:48.24 [info     ] BCQ_20251206031126: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.0021081817150115966, 'time_algorithm_update': 0.02580718183517456, 'vae_loss': 0.004069259125273675, 'critic_loss': 5599.783020019531, 'actor_loss': -380.7911645202637, 'time_step': 0.028183870792388915, 'td_error': 1760.1978394821074, 'value_scale': 324.1898317572107, 'discounted_advantage': -333.6334428020689, 'initial_state': 442.22723388671875, 'diff_eval': 251.98168614852122} step=165000
2025-12-06 04:48.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.46it/s, vae_loss=0.00393, critic_loss=5.04e+3, actor_loss=-361]


2025-12-06 04:48.58 [info     ] BCQ_20251206031126: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.002142889738082886, 'time_algorithm_update': 0.026067583322525026, 'vae_loss': 0.003923674457357265, 'critic_loss': 5037.19076953125, 'actor_loss': -361.1378273925781, 'time_step': 0.02846688508987427, 'td_error': 1741.693780861858, 'value_scale': 314.8905777568489, 'discounted_advantage': -350.76471564199693, 'initial_state': 419.1915588378906, 'diff_eval': 318.5680035996402} step=166000
2025-12-06 04:48.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.04it/s, vae_loss=0.00407, critic_loss=4.97e+3, actor_loss=-342]


2025-12-06 04:49.32 [info     ] BCQ_20251206031126: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.0021330506801605226, 'time_algorithm_update': 0.02560882019996643, 'vae_loss': 0.004071608080528677, 'critic_loss': 4968.270481201172, 'actor_loss': -342.098610458374, 'time_step': 0.027999844789505006, 'td_error': 1405.3528361885335, 'value_scale': 294.64636176657194, 'discounted_advantage': -261.3810897419137, 'initial_state': 396.9407958984375, 'diff_eval': 198.35412990939926} step=167000
2025-12-06 04:49.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.90it/s, vae_loss=0.00397, critic_loss=4.55e+3, actor_loss=-321]


2025-12-06 04:50.06 [info     ] BCQ_20251206031126: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.0021118502616882325, 'time_algorithm_update': 0.025782782554626463, 'vae_loss': 0.00396901015623007, 'critic_loss': 4539.179240112305, 'actor_loss': -321.36914134216306, 'time_step': 0.028135600090026854, 'td_error': 1378.2347529610397, 'value_scale': 273.9614587935548, 'discounted_advantage': -237.64039228096323, 'initial_state': 389.5711364746094, 'diff_eval': 248.69024491749502} step=168000
2025-12-06 04:50.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.33it/s, vae_loss=0.00401, critic_loss=4.42e+3, actor_loss=-304]


2025-12-06 04:50.41 [info     ] BCQ_20251206031126: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.0021774561405181886, 'time_algorithm_update': 0.026170110940933227, 'vae_loss': 0.004011514190468006, 'critic_loss': 4427.974412231445, 'actor_loss': -303.6870497894287, 'time_step': 0.028607637166976927, 'td_error': 1437.9501905457553, 'value_scale': 248.7102566167075, 'discounted_advantage': -209.21396726874602, 'initial_state': 356.7040100097656, 'diff_eval': 251.4118943435473} step=169000
2025-12-06 04:50.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.53it/s, vae_loss=0.00395, critic_loss=4.11e+3, actor_loss=-282]


2025-12-06 04:51.16 [info     ] BCQ_20251206031126: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.0021308155059814454, 'time_algorithm_update': 0.026043689727783204, 'vae_loss': 0.003949413979775272, 'critic_loss': 4101.827629638672, 'actor_loss': -281.7752814331055, 'time_step': 0.028437002420425416, 'td_error': 1128.320769279536, 'value_scale': 221.98303449960787, 'discounted_advantage': -213.12018347327898, 'initial_state': 313.6334228515625, 'diff_eval': 212.32735367115785} step=170000
2025-12-06 04:51.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.87it/s, vae_loss=0.00384, critic_loss=3.79e+3, actor_loss=-262]


2025-12-06 04:51.49 [info     ] BCQ_20251206031126: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.0021411187648773195, 'time_algorithm_update': 0.025796682119369505, 'vae_loss': 0.0038432216295041146, 'critic_loss': 3788.27713293457, 'actor_loss': -261.80186070251466, 'time_step': 0.028181830167770386, 'td_error': 1223.438567143981, 'value_scale': 194.705123557329, 'discounted_advantage': -165.0561315367778, 'initial_state': 303.6995849609375, 'diff_eval': 187.60941090694013} step=171000
2025-12-06 04:51.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.43it/s, vae_loss=0.00398, critic_loss=3.57e+3, actor_loss=-242]


2025-12-06 04:52.24 [info     ] BCQ_20251206031126: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.0021639819145202635, 'time_algorithm_update': 0.026063003301620483, 'vae_loss': 0.003980400032014586, 'critic_loss': 3569.473330078125, 'actor_loss': -242.05438774108887, 'time_step': 0.028504367351531983, 'td_error': 1131.1606427893973, 'value_scale': 181.24699295488907, 'discounted_advantage': -184.6375294603039, 'initial_state': 271.76666259765625, 'diff_eval': 272.00060037451533} step=172000
2025-12-06 04:52.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.53it/s, vae_loss=0.00401, critic_loss=3.33e+3, actor_loss=-219]


2025-12-06 04:52.58 [info     ] BCQ_20251206031126: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.002114312410354614, 'time_algorithm_update': 0.026054205656051637, 'vae_loss': 0.0040041672139195725, 'critic_loss': 3328.424905761719, 'actor_loss': -219.2471240234375, 'time_step': 0.02842425560951233, 'td_error': 989.143275586154, 'value_scale': 164.1998838773817, 'discounted_advantage': -145.5016565999876, 'initial_state': 238.87225341796875, 'diff_eval': 250.9709457101812} step=173000
2025-12-06 04:52.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.76it/s, vae_loss=0.00385, critic_loss=2.99e+3, actor_loss=-198]


2025-12-06 04:53.33 [info     ] BCQ_20251206031126: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.002118213415145874, 'time_algorithm_update': 0.025899704933166505, 'vae_loss': 0.0038579003991326315, 'critic_loss': 2991.670480834961, 'actor_loss': -197.31587971496583, 'time_step': 0.028268106698989867, 'td_error': 1367.0885065705925, 'value_scale': 141.838072991556, 'discounted_advantage': -107.49401000465643, 'initial_state': 231.47433471679688, 'diff_eval': 207.54081794697015} step=174000
2025-12-06 04:53.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.68it/s, vae_loss=0.00392, critic_loss=2.93e+3, actor_loss=-175]


2025-12-06 04:54.07 [info     ] BCQ_20251206031126: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.0021185910701751708, 'time_algorithm_update': 0.025952486753463747, 'vae_loss': 0.0039056054159300403, 'critic_loss': 2926.996067504883, 'actor_loss': -175.09681449890138, 'time_step': 0.02832047724723816, 'td_error': 946.6592385101208, 'value_scale': 117.74517580272266, 'discounted_advantage': -106.75057831145779, 'initial_state': 199.7410888671875, 'diff_eval': 175.050992445212} step=175000
2025-12-06 04:54.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.59it/s, vae_loss=0.00395, critic_loss=2.62e+3, actor_loss=-154]


2025-12-06 04:54.43 [info     ] BCQ_20251206031126: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.002090444326400757, 'time_algorithm_update': 0.026855175018310546, 'vae_loss': 0.0039452268171589824, 'critic_loss': 2612.5620308837892, 'actor_loss': -153.45332346343994, 'time_step': 0.029221681356430055, 'td_error': 990.0428142702915, 'value_scale': 117.77874588526704, 'discounted_advantage': -141.86171779229602, 'initial_state': 209.24195861816406, 'diff_eval': 277.83248728835736} step=176000
2025-12-06 04:54.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.72it/s, vae_loss=0.00378, critic_loss=2.26e+3, actor_loss=-132]


2025-12-06 04:55.17 [info     ] BCQ_20251206031126: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.0021010982990264894, 'time_algorithm_update': 0.025877201557159425, 'vae_loss': 0.0037839279529871418, 'critic_loss': 2260.7404665527342, 'actor_loss': -132.02088873672486, 'time_step': 0.02824294137954712, 'td_error': 1719.4131026554705, 'value_scale': 90.59207367143857, 'discounted_advantage': -12.874375680630175, 'initial_state': 183.9810333251953, 'diff_eval': 164.65175516361757} step=177000
2025-12-06 04:55.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.43it/s, vae_loss=0.00388, critic_loss=2.21e+3, actor_loss=-115]


2025-12-06 04:55.53 [info     ] BCQ_20251206031126: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.002116638898849487, 'time_algorithm_update': 0.026123262643814087, 'vae_loss': 0.003880085627199151, 'critic_loss': 2216.173403137207, 'actor_loss': -114.74714120674133, 'time_step': 0.02849780797958374, 'td_error': 831.9226681475122, 'value_scale': 75.84225970614087, 'discounted_advantage': -66.28681753544353, 'initial_state': 153.1183624267578, 'diff_eval': 177.94279687771305} step=178000
2025-12-06 04:55.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.38it/s, vae_loss=0.00386, critic_loss=2.05e+3, actor_loss=-97.4]


2025-12-06 04:56.28 [info     ] BCQ_20251206031126: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.002131645917892456, 'time_algorithm_update': 0.02616310524940491, 'vae_loss': 0.003864526966935955, 'critic_loss': 2049.007518371582, 'actor_loss': -97.1834390296936, 'time_step': 0.02856275677680969, 'td_error': 1061.5411468477344, 'value_scale': 47.38735329750098, 'discounted_advantage': -12.65384184087233, 'initial_state': 107.83972930908203, 'diff_eval': 247.89144983343888} step=179000
2025-12-06 04:56.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.07it/s, vae_loss=0.00384, critic_loss=2.01e+3, actor_loss=-81.7]


2025-12-06 04:57.04 [info     ] BCQ_20251206031126: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.0024900028705596923, 'time_algorithm_update': 0.02696653962135315, 'vae_loss': 0.0038439290415262805, 'critic_loss': 2004.3242844238282, 'actor_loss': -81.46292126464844, 'time_step': 0.029702407121658326, 'td_error': 812.2975524626081, 'value_scale': 45.17606016306545, 'discounted_advantage': -39.67213774083515, 'initial_state': 107.12129211425781, 'diff_eval': 245.97922779873997} step=180000
2025-12-06 04:57.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.78it/s, vae_loss=0.00377, critic_loss=1.93e+3, actor_loss=-68.7]


2025-12-06 04:57.38 [info     ] BCQ_20251206031126: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.002101719856262207, 'time_algorithm_update': 0.025901064157485963, 'vae_loss': 0.0037760712835006416, 'critic_loss': 1924.4135913696289, 'actor_loss': -68.68142317867279, 'time_step': 0.02825115895271301, 'td_error': 871.3031287108564, 'value_scale': 30.35945670220498, 'discounted_advantage': -52.4102549097698, 'initial_state': 71.01670837402344, 'diff_eval': 206.38485535011537} step=181000
2025-12-06 04:57.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.86it/s, vae_loss=0.00379, critic_loss=1.98e+3, actor_loss=-59.1]


2025-12-06 04:58.12 [info     ] BCQ_20251206031126: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.0020794155597686765, 'time_algorithm_update': 0.025833972215652465, 'vae_loss': 0.0037867932559456675, 'critic_loss': 1980.8723882446288, 'actor_loss': -59.23741407775879, 'time_step': 0.028176549911499024, 'td_error': 964.9223895010024, 'value_scale': 22.371568288557288, 'discounted_advantage': -26.67221393496359, 'initial_state': 85.48831176757812, 'diff_eval': 167.15951961860122} step=182000
2025-12-06 04:58.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.81it/s, vae_loss=0.00379, critic_loss=1.97e+3, actor_loss=-50.9]


2025-12-06 04:58.46 [info     ] BCQ_20251206031126: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.00212628698348999, 'time_algorithm_update': 0.025811998844146727, 'vae_loss': 0.0037982261151773855, 'critic_loss': 1972.624682800293, 'actor_loss': -50.88383674442768, 'time_step': 0.028200424194335938, 'td_error': 907.9140058787183, 'value_scale': 15.705494966710804, 'discounted_advantage': -26.43743251288251, 'initial_state': 56.85679244995117, 'diff_eval': 206.77240497267726} step=183000
2025-12-06 04:58.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.88it/s, vae_loss=0.00371, critic_loss=1.88e+3, actor_loss=-44.5]


2025-12-06 04:59.21 [info     ] BCQ_20251206031126: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.002185321569442749, 'time_algorithm_update': 0.026489518880844116, 'vae_loss': 0.003705050842836499, 'critic_loss': 1878.6724844970704, 'actor_loss': -44.43389296920598, 'time_step': 0.02894853377342224, 'td_error': 766.4992256903877, 'value_scale': 12.330390018881717, 'discounted_advantage': -36.676432851377115, 'initial_state': 50.069278717041016, 'diff_eval': 206.50059178031543} step=184000
2025-12-06 04:59.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.59it/s, vae_loss=0.00382, critic_loss=2.04e+3, actor_loss=-40.4]


2025-12-06 04:59.56 [info     ] BCQ_20251206031126: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.0021542026996612547, 'time_algorithm_update': 0.02597997736930847, 'vae_loss': 0.0038127883119741454, 'critic_loss': 2030.340451599121, 'actor_loss': -40.29743745550513, 'time_step': 0.028407915353775025, 'td_error': 877.7204428914995, 'value_scale': 3.900264898494623, 'discounted_advantage': -39.77826686682903, 'initial_state': 32.73158264160156, 'diff_eval': 179.20792522973886} step=185000
2025-12-06 04:59.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.34it/s, vae_loss=0.00374, critic_loss=1.98e+3, actor_loss=-36.6]


2025-12-06 05:00.30 [info     ] BCQ_20251206031126: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.0020806989669799806, 'time_algorithm_update': 0.02546027112007141, 'vae_loss': 0.003739677542936988, 'critic_loss': 1976.6398898315429, 'actor_loss': -36.599465260788335, 'time_step': 0.027789834260940553, 'td_error': 927.572071437279, 'value_scale': 1.396748210113786, 'discounted_advantage': -3.9336787934385553, 'initial_state': 31.79336166381836, 'diff_eval': 173.97333071054123} step=186000
2025-12-06 05:00.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.81it/s, vae_loss=0.00381, critic_loss=1.99e+3, actor_loss=-34.2]


2025-12-06 05:01.04 [info     ] BCQ_20251206031126: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.002102681875228882, 'time_algorithm_update': 0.025894446849822998, 'vae_loss': 0.0038142601221334187, 'critic_loss': 1995.1700441284179, 'actor_loss': -34.21941160724312, 'time_step': 0.028250156164169312, 'td_error': 1000.3548532281159, 'value_scale': 0.8717658453919962, 'discounted_advantage': 6.791599682528007, 'initial_state': 39.24992752075195, 'diff_eval': 224.64908393199087} step=187000
2025-12-06 05:01.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.32it/s, vae_loss=0.0036, critic_loss=1.97e+3, actor_loss=-33.5]


2025-12-06 05:01.39 [info     ] BCQ_20251206031126: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.002184237003326416, 'time_algorithm_update': 0.026150661706924438, 'vae_loss': 0.003603172830073163, 'critic_loss': 1968.932531890869, 'actor_loss': -33.51677964213491, 'time_step': 0.028601688623428344, 'td_error': 948.8135984465628, 'value_scale': 6.881925752119466, 'discounted_advantage': -43.637088941919046, 'initial_state': 30.817550659179688, 'diff_eval': 155.60358415292873} step=188000
2025-12-06 05:01.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.56it/s, vae_loss=0.00374, critic_loss=2.04e+3, actor_loss=-37] 


2025-12-06 05:02.13 [info     ] BCQ_20251206031126: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.002146007537841797, 'time_algorithm_update': 0.02599951958656311, 'vae_loss': 0.0037344742497662084, 'critic_loss': 2038.7294772644043, 'actor_loss': -37.00801998722553, 'time_step': 0.028397271156311035, 'td_error': 1001.3401008374475, 'value_scale': 4.65976539693854, 'discounted_advantage': -22.01437648221929, 'initial_state': 29.998193740844727, 'diff_eval': 141.0764157081524} step=189000
2025-12-06 05:02.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.67it/s, vae_loss=0.00382, critic_loss=2.05e+3, actor_loss=-40.6]


2025-12-06 05:02.47 [info     ] BCQ_20251206031126: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.0021228957176208495, 'time_algorithm_update': 0.025945820331573485, 'vae_loss': 0.0038171991953859104, 'critic_loss': 2050.096218414307, 'actor_loss': -40.63214217364788, 'time_step': 0.028334786415100097, 'td_error': 914.4237967655635, 'value_scale': 12.08534869334324, 'discounted_advantage': -40.703511557122084, 'initial_state': 36.90108871459961, 'diff_eval': 199.6113757987345} step=190000
2025-12-06 05:02.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:28<00:00, 35.21it/s, vae_loss=0.00371, critic_loss=2.03e+3, actor_loss=-44.1]


2025-12-06 05:03.21 [info     ] BCQ_20251206031126: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.0021329355239868165, 'time_algorithm_update': 0.025511489152908327, 'vae_loss': 0.003701945455977693, 'critic_loss': 2027.9418066101075, 'actor_loss': -44.13370444537699, 'time_step': 0.02789902663230896, 'td_error': 847.8480122549038, 'value_scale': 4.088964000306805, 'discounted_advantage': -20.411946319194964, 'initial_state': 17.394285202026367, 'diff_eval': 232.39330627064368} step=191000
2025-12-06 05:03.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.07it/s, vae_loss=0.00386, critic_loss=2.23e+3, actor_loss=-51.2]


2025-12-06 05:03.56 [info     ] BCQ_20251206031126: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.0021836934089660646, 'time_algorithm_update': 0.02636365842819214, 'vae_loss': 0.003857395622995682, 'critic_loss': 2229.976277709961, 'actor_loss': -51.30271807893366, 'time_step': 0.02882778000831604, 'td_error': 1216.9415885446592, 'value_scale': 25.240656920770387, 'discounted_advantage': -70.83223544912768, 'initial_state': 46.07341003417969, 'diff_eval': 227.92431423925422} step=192000
2025-12-06 05:03.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.24it/s, vae_loss=0.0037, critic_loss=2.17e+3, actor_loss=-57.5]


2025-12-06 05:04.31 [info     ] BCQ_20251206031126: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.0021837387084960936, 'time_algorithm_update': 0.026257651567459107, 'vae_loss': 0.003704840329824947, 'critic_loss': 2168.7806317749023, 'actor_loss': -57.50932972359657, 'time_step': 0.028696038484573364, 'td_error': 972.6184250301633, 'value_scale': 27.851174415544314, 'discounted_advantage': -63.032725848886265, 'initial_state': 52.21916961669922, 'diff_eval': 174.87888830955947} step=193000
2025-12-06 05:04.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.12it/s, vae_loss=0.00375, critic_loss=2.31e+3, actor_loss=-65.8]


2025-12-06 05:05.06 [info     ] BCQ_20251206031126: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.0020949366092681884, 'time_algorithm_update': 0.026439348936080933, 'vae_loss': 0.0037510710963979364, 'critic_loss': 2309.157845275879, 'actor_loss': -65.74671744060517, 'time_step': 0.02878161644935608, 'td_error': 1325.9591029535754, 'value_scale': 25.74254288485044, 'discounted_advantage': -10.693350184038216, 'initial_state': 50.57218933105469, 'diff_eval': 175.42334504759862} step=194000
2025-12-06 05:05.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.53it/s, vae_loss=0.00365, critic_loss=2.55e+3, actor_loss=-75.2]


2025-12-06 05:05.40 [info     ] BCQ_20251206031126: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.0021501410007476805, 'time_algorithm_update': 0.026025031566619873, 'vae_loss': 0.003656120998901315, 'critic_loss': 2551.065546813965, 'actor_loss': -75.21281162929535, 'time_step': 0.02843636226654053, 'td_error': 1786.4717949920303, 'value_scale': 36.21442162913129, 'discounted_advantage': -89.75473242289917, 'initial_state': 33.798606872558594, 'diff_eval': 218.59739227670582} step=195000
2025-12-06 05:05.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.65it/s, vae_loss=0.00361, critic_loss=2.8e+3, actor_loss=-84.5]


2025-12-06 05:06.15 [info     ] BCQ_20251206031126: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.002125361680984497, 'time_algorithm_update': 0.025932764291763305, 'vae_loss': 0.003606792672770098, 'critic_loss': 2801.7089959106447, 'actor_loss': -84.70195713424683, 'time_step': 0.028320424795150758, 'td_error': 1387.6899801470754, 'value_scale': 50.827302393199815, 'discounted_advantage': -104.17850420892883, 'initial_state': 82.70911407470703, 'diff_eval': 203.84583435329475} step=196000
2025-12-06 05:06.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.92it/s, vae_loss=0.00356, critic_loss=2.96e+3, actor_loss=-96.8]


2025-12-06 05:06.49 [info     ] BCQ_20251206031126: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.002109447956085205, 'time_algorithm_update': 0.025749694108963012, 'vae_loss': 0.0035648005245020615, 'critic_loss': 2976.303825256348, 'actor_loss': -96.70895530700683, 'time_step': 0.028113914012908935, 'td_error': 1152.4163101356744, 'value_scale': 43.79648833666293, 'discounted_advantage': -58.919344537512686, 'initial_state': 55.26789855957031, 'diff_eval': 144.1297831249545} step=197000
2025-12-06 05:06.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.38it/s, vae_loss=0.00367, critic_loss=3.3e+3, actor_loss=-110]


2025-12-06 05:07.23 [info     ] BCQ_20251206031126: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.002197055816650391, 'time_algorithm_update': 0.026129478216171266, 'vae_loss': 0.003666403402108699, 'critic_loss': 3290.58876159668, 'actor_loss': -109.60223540306092, 'time_step': 0.02856649374961853, 'td_error': 2617.1610216445442, 'value_scale': 73.4755715521463, 'discounted_advantage': -150.32319521532946, 'initial_state': 96.47386932373047, 'diff_eval': 209.20132551073814} step=198000
2025-12-06 05:07.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.01it/s, vae_loss=0.00365, critic_loss=3.5e+3, actor_loss=-122]


2025-12-06 05:07.58 [info     ] BCQ_20251206031126: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.00212298583984375, 'time_algorithm_update': 0.026541423320770263, 'vae_loss': 0.0036459523683879526, 'critic_loss': 3497.2582352905274, 'actor_loss': -122.40583721160888, 'time_step': 0.028904587984085083, 'td_error': 1486.6411797450824, 'value_scale': 78.08011783781865, 'discounted_advantage': -121.60137172478771, 'initial_state': 95.39607238769531, 'diff_eval': 192.25019652652452} step=199000
2025-12-06 05:07.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.64it/s, vae_loss=0.00354, critic_loss=3.73e+3, actor_loss=-135]


2025-12-06 05:08.33 [info     ] BCQ_20251206031126: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.002128641128540039, 'time_algorithm_update': 0.02596136236190796, 'vae_loss': 0.0035324740735813973, 'critic_loss': 3727.242287109375, 'actor_loss': -134.79820697784425, 'time_step': 0.028357470750808716, 'td_error': 1804.227412509953, 'value_scale': 85.49470993423053, 'discounted_advantage': -119.64621705999302, 'initial_state': 111.2763442993164, 'diff_eval': 213.95101965430138} step=200000
2025-12-06 05:08.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\BCQ_20251206031126\model_200000.d3
Training model:  BEAR
2025-12-06 05:08.33 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=1

Epoch 1/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.74it/s, imitator_loss=0.0493, critic_loss=3.56, actor_loss=-0.0171] 


2025-12-06 05:09.08 [info     ] BEAR_20251206050833: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.004884363174438477, 'time_algorithm_update': 0.02394502377510071, 'imitator_loss': 0.049241085728630424, 'critic_loss': 3.5790620698332787, 'actor_loss': -0.017101427763322137, 'time_step': 0.02909015989303589, 'td_error': 2.1372756514511924, 'value_scale': -7.621922487784911, 'discounted_advantage': 8.097443624603674, 'initial_state': -8.481053352355957, 'diff_eval': 3530.392468576826} step=1000
2025-12-06 05:09.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.64it/s, imitator_loss=0.0374, critic_loss=7.81, actor_loss=-0.0239]


2025-12-06 05:09.43 [info     ] BEAR_20251206050833: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.004882925033569336, 'time_algorithm_update': 0.02404536247253418, 'imitator_loss': 0.03736178345233202, 'critic_loss': 7.819709290027618, 'actor_loss': -0.023862500118499157, 'time_step': 0.029188241720199584, 'td_error': 3.644442344105321, 'value_scale': -16.31579644442204, 'discounted_advantage': 18.050150185460748, 'initial_state': -17.039220809936523, 'diff_eval': 3432.5163608230787} step=2000
2025-12-06 05:09.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.57it/s, imitator_loss=0.0349, critic_loss=9.96, actor_loss=-0.0232]


2025-12-06 05:10.18 [info     ] BEAR_20251206050833: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.004956900596618652, 'time_algorithm_update': 0.02402416706085205, 'imitator_loss': 0.034927403243258597, 'critic_loss': 9.973013386249542, 'actor_loss': -0.023150875865947454, 'time_step': 0.02924462866783142, 'td_error': 4.856225491060223, 'value_scale': -25.218034137524384, 'discounted_advantage': 24.8566477796604, 'initial_state': -25.306869506835938, 'diff_eval': 3837.197550333374} step=3000
2025-12-06 05:10.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.48it/s, imitator_loss=0.0313, critic_loss=11.2, actor_loss=-0.021]


2025-12-06 05:10.53 [info     ] BEAR_20251206050833: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.004991362571716309, 'time_algorithm_update': 0.02403992509841919, 'imitator_loss': 0.031290298717096446, 'critic_loss': 11.205827246665955, 'actor_loss': -0.020998292145784945, 'time_step': 0.029314578533172608, 'td_error': 6.139949641082849, 'value_scale': -34.20020719146089, 'discounted_advantage': 39.38083897758083, 'initial_state': -33.0169792175293, 'diff_eval': 3427.1019235430795} step=4000
2025-12-06 05:10.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.89it/s, imitator_loss=0.0286, critic_loss=13.7, actor_loss=-0.02] 


2025-12-06 05:11.28 [info     ] BEAR_20251206050833: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.0048985078334808346, 'time_algorithm_update': 0.023845746994018556, 'imitator_loss': 0.028604463825002313, 'critic_loss': 13.713037971973419, 'actor_loss': -0.019978326107608156, 'time_step': 0.028996408462524412, 'td_error': 7.465406298261023, 'value_scale': -43.34859707889093, 'discounted_advantage': 43.54234697600721, 'initial_state': -39.82851028442383, 'diff_eval': 3433.747935241433} step=5000
2025-12-06 05:11.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.90it/s, imitator_loss=0.0266, critic_loss=16.6, actor_loss=-0.0196]


2025-12-06 05:12.03 [info     ] BEAR_20251206050833: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.004915302991867066, 'time_algorithm_update': 0.02381570601463318, 'imitator_loss': 0.026568820729851724, 'critic_loss': 16.593149887084962, 'actor_loss': -0.019596194883808492, 'time_step': 0.02898432755470276, 'td_error': 8.15710655714042, 'value_scale': -53.47148148791688, 'discounted_advantage': 56.045101175029465, 'initial_state': -46.57639694213867, 'diff_eval': 3385.0196325036322} step=6000
2025-12-06 05:12.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.38it/s, imitator_loss=0.0248, critic_loss=20.3, actor_loss=-0.0199]


2025-12-06 05:12.38 [info     ] BEAR_20251206050833: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.004797783613204956, 'time_algorithm_update': 0.023517566442489625, 'imitator_loss': 0.02477665210608393, 'critic_loss': 20.29634353733063, 'actor_loss': -0.01986678119577118, 'time_step': 0.028566970109939575, 'td_error': 9.126173396477032, 'value_scale': -64.70679181779221, 'discounted_advantage': 70.44673709670975, 'initial_state': -54.512596130371094, 'diff_eval': 3548.7872858810756} step=7000
2025-12-06 05:12.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.17it/s, imitator_loss=0.0235, critic_loss=24.2, actor_loss=-0.0193]


2025-12-06 05:13.12 [info     ] BEAR_20251206050833: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.004856873989105224, 'time_algorithm_update': 0.02365665054321289, 'imitator_loss': 0.023512756936252117, 'critic_loss': 24.222009742736816, 'actor_loss': -0.019188502067583612, 'time_step': 0.028766528606414796, 'td_error': 11.030228886081792, 'value_scale': -76.77854522135911, 'discounted_advantage': 81.53577276921555, 'initial_state': -64.55012512207031, 'diff_eval': 3220.1949390525883} step=8000
2025-12-06 05:13.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.47it/s, imitator_loss=0.0224, critic_loss=30, actor_loss=-0.0193] 


2025-12-06 05:13.47 [info     ] BEAR_20251206050833: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.004819861173629761, 'time_algorithm_update': 0.02428261399269104, 'imitator_loss': 0.022425590596161782, 'critic_loss': 30.140848929405212, 'actor_loss': -0.01933970744162798, 'time_step': 0.0293688063621521, 'td_error': 11.935043221988808, 'value_scale': -89.39187183683896, 'discounted_advantage': 95.42161849404526, 'initial_state': -77.52950286865234, 'diff_eval': 3401.0638918402233} step=9000
2025-12-06 05:13.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.85it/s, imitator_loss=0.0218, critic_loss=36.5, actor_loss=-0.0185]


2025-12-06 05:14.23 [info     ] BEAR_20251206050833: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.0048517887592315675, 'time_algorithm_update': 0.0240908203125, 'imitator_loss': 0.021728135000914334, 'critic_loss': 36.51898360443115, 'actor_loss': -0.018433537496253847, 'time_step': 0.029207950830459593, 'td_error': 14.044427650557585, 'value_scale': -101.69825128777529, 'discounted_advantage': 100.60252728386489, 'initial_state': -90.6045150756836, 'diff_eval': 2946.69219356965} step=10000
2025-12-06 05:14.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.48it/s, imitator_loss=0.0207, critic_loss=42.7, actor_loss=-0.0185]


2025-12-06 05:14.59 [info     ] BEAR_20251206050833: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.004990256786346435, 'time_algorithm_update': 0.024045224905014036, 'imitator_loss': 0.020706373220309614, 'critic_loss': 42.74425434112549, 'actor_loss': -0.018434608393348755, 'time_step': 0.02930733251571655, 'td_error': 14.294551950916281, 'value_scale': -111.0620045881759, 'discounted_advantage': 108.97254385701432, 'initial_state': -101.93707275390625, 'diff_eval': 2927.278495566805} step=11000
2025-12-06 05:14.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.35it/s, imitator_loss=0.02, critic_loss=49.6, actor_loss=-0.0179] 


2025-12-06 05:15.35 [info     ] BEAR_20251206050833: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.0050395805835723876, 'time_algorithm_update': 0.023970357179641723, 'imitator_loss': 0.019985121534205973, 'critic_loss': 49.72662216949463, 'actor_loss': -0.017946842279285193, 'time_step': 0.02927844548225403, 'td_error': 16.647780443434723, 'value_scale': -118.92605734231186, 'discounted_advantage': 110.89595872064143, 'initial_state': -113.67420196533203, 'diff_eval': 2866.339084792156} step=12000
2025-12-06 05:15.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.54it/s, imitator_loss=0.0196, critic_loss=56.4, actor_loss=-0.0177]


2025-12-06 05:16.11 [info     ] BEAR_20251206050833: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.004888472318649292, 'time_algorithm_update': 0.025028804540634154, 'imitator_loss': 0.019602676041424273, 'critic_loss': 56.44402969932556, 'actor_loss': -0.017689249414019288, 'time_step': 0.030180366039276124, 'td_error': 20.022551765294796, 'value_scale': -123.60100816021621, 'discounted_advantage': 113.86200169912969, 'initial_state': -122.56465148925781, 'diff_eval': 2766.1364737590666} step=13000
2025-12-06 05:16.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:31<00:00, 32.15it/s, imitator_loss=0.019, critic_loss=65.6, actor_loss=-0.0174]


2025-12-06 05:16.48 [info     ] BEAR_20251206050833: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.00517444109916687, 'time_algorithm_update': 0.02516672682762146, 'imitator_loss': 0.019016785386018455, 'critic_loss': 65.65294143295289, 'actor_loss': -0.01735717587871477, 'time_step': 0.03060101819038391, 'td_error': 22.71711354134029, 'value_scale': -125.58091096326471, 'discounted_advantage': 112.98634200579897, 'initial_state': -126.64429473876953, 'diff_eval': 2731.110723379739} step=14000
2025-12-06 05:16.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.42it/s, imitator_loss=0.0181, critic_loss=74, actor_loss=-0.0172] 


2025-12-06 05:17.23 [info     ] BEAR_20251206050833: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.004848053932189941, 'time_algorithm_update': 0.023441352128982545, 'imitator_loss': 0.018140312228351833, 'critic_loss': 74.09721785736085, 'actor_loss': -0.017230061284266414, 'time_step': 0.02854287362098694, 'td_error': 27.797988465631967, 'value_scale': -125.20206252687235, 'discounted_advantage': 116.27041820574594, 'initial_state': -130.01239013671875, 'diff_eval': 2527.994888288773} step=15000
2025-12-06 05:17.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.34it/s, imitator_loss=0.0179, critic_loss=84.1, actor_loss=-0.0168]


2025-12-06 05:17.57 [info     ] BEAR_20251206050833: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.0048315727710723875, 'time_algorithm_update': 0.023515181541442873, 'imitator_loss': 0.017876440488733353, 'critic_loss': 84.1264246788025, 'actor_loss': -0.016808577145449817, 'time_step': 0.028607577323913575, 'td_error': 30.65768485387213, 'value_scale': -122.79948562548526, 'discounted_advantage': 105.88034739611184, 'initial_state': -128.82789611816406, 'diff_eval': 2645.13395130323} step=16000
2025-12-06 05:17.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.79it/s, imitator_loss=0.0176, critic_loss=94.5, actor_loss=-0.0171]


2025-12-06 05:18.32 [info     ] BEAR_20251206050833: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.004963818550109863, 'time_algorithm_update': 0.023756124258041383, 'imitator_loss': 0.01757137943431735, 'critic_loss': 94.61763607025146, 'actor_loss': -0.017082837726920843, 'time_step': 0.02901262831687927, 'td_error': 34.1405459277351, 'value_scale': -121.3669388048407, 'discounted_advantage': 105.02889990242625, 'initial_state': -129.454833984375, 'diff_eval': 2465.677347893541} step=17000
2025-12-06 05:18.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.34it/s, imitator_loss=0.0171, critic_loss=107, actor_loss=-0.0169]


2025-12-06 05:19.08 [info     ] BEAR_20251206050833: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.0049945485591888425, 'time_algorithm_update': 0.02418870687484741, 'imitator_loss': 0.01703754055313766, 'critic_loss': 106.89425352478027, 'actor_loss': -0.016926727618090807, 'time_step': 0.029452436208724977, 'td_error': 39.96640750055708, 'value_scale': -120.31833313211911, 'discounted_advantage': 106.3809121567017, 'initial_state': -132.5695343017578, 'diff_eval': 2498.403408453621} step=18000
2025-12-06 05:19.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.56it/s, imitator_loss=0.0167, critic_loss=122, actor_loss=-0.0165]


2025-12-06 05:19.43 [info     ] BEAR_20251206050833: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.005044183015823364, 'time_algorithm_update': 0.023968991279602052, 'imitator_loss': 0.0167219504378736, 'critic_loss': 122.18222093963622, 'actor_loss': -0.01654729671264067, 'time_step': 0.02927674651145935, 'td_error': 41.22195628789966, 'value_scale': -116.72991459987189, 'discounted_advantage': 101.4393164958853, 'initial_state': -128.38693237304688, 'diff_eval': 2847.9717211695006} step=19000
2025-12-06 05:19.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.57it/s, imitator_loss=0.0163, critic_loss=132, actor_loss=-0.0167]


2025-12-06 05:20.18 [info     ] BEAR_20251206050833: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.0049441344738006595, 'time_algorithm_update': 0.024056753873825075, 'imitator_loss': 0.01629588375147432, 'critic_loss': 132.10493489456178, 'actor_loss': -0.016665693532675503, 'time_step': 0.029261243104934692, 'td_error': 46.093115238157715, 'value_scale': -114.22662639276929, 'discounted_advantage': 98.9388903239315, 'initial_state': -127.7568588256836, 'diff_eval': 2454.2543033457246} step=20000
2025-12-06 05:20.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.94it/s, imitator_loss=0.0157, critic_loss=150, actor_loss=-0.0168]


2025-12-06 05:20.53 [info     ] BEAR_20251206050833: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.004960317373275757, 'time_algorithm_update': 0.023694909811019897, 'imitator_loss': 0.015699732021428645, 'critic_loss': 149.96607180404663, 'actor_loss': -0.016842459549661726, 'time_step': 0.02892136597633362, 'td_error': 52.595227908027255, 'value_scale': -111.14766500192036, 'discounted_advantage': 101.03168948508167, 'initial_state': -124.5092544555664, 'diff_eval': 2407.966388449573} step=21000
2025-12-06 05:20.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.82it/s, imitator_loss=0.0154, critic_loss=166, actor_loss=-0.0164]


2025-12-06 05:21.28 [info     ] BEAR_20251206050833: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.004926982879638672, 'time_algorithm_update': 0.023856544494628908, 'imitator_loss': 0.015359981916844845, 'critic_loss': 166.28656887054444, 'actor_loss': -0.01638594695739448, 'time_step': 0.029040067195892333, 'td_error': 45.61596738239643, 'value_scale': -108.5966622587871, 'discounted_advantage': 92.75709357231906, 'initial_state': -117.4571304321289, 'diff_eval': 2560.2303684947055} step=22000
2025-12-06 05:21.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.89it/s, imitator_loss=0.0152, critic_loss=185, actor_loss=-0.0165]


2025-12-06 05:22.03 [info     ] BEAR_20251206050833: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.004886160135269165, 'time_algorithm_update': 0.02383772897720337, 'imitator_loss': 0.01515756537951529, 'critic_loss': 184.51219342041014, 'actor_loss': -0.016491911674849688, 'time_step': 0.0289892418384552, 'td_error': 49.57937141752309, 'value_scale': -105.38808010926991, 'discounted_advantage': 86.19125833314087, 'initial_state': -115.76950073242188, 'diff_eval': 2575.786349207575} step=23000
2025-12-06 05:22.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.43it/s, imitator_loss=0.0147, critic_loss=202, actor_loss=-0.0162]


2025-12-06 05:22.38 [info     ] BEAR_20251206050833: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.005026912927627564, 'time_algorithm_update': 0.02410894775390625, 'imitator_loss': 0.014652195140719413, 'critic_loss': 201.4373186340332, 'actor_loss': -0.016184617274207995, 'time_step': 0.029397197723388672, 'td_error': 54.52634196996349, 'value_scale': -103.5851228781488, 'discounted_advantage': 85.06164748029911, 'initial_state': -115.39771270751953, 'diff_eval': 2484.7438175211933} step=24000
2025-12-06 05:22.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.59it/s, imitator_loss=0.0143, critic_loss=228, actor_loss=-0.016]


2025-12-06 05:23.13 [info     ] BEAR_20251206050833: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.0048341224193572995, 'time_algorithm_update': 0.02333818507194519, 'imitator_loss': 0.014298795425333083, 'critic_loss': 227.92269353485108, 'actor_loss': -0.01604621647205204, 'time_step': 0.028423157930374147, 'td_error': 64.63337331366938, 'value_scale': -96.85998953188155, 'discounted_advantage': 75.06007373151614, 'initial_state': -112.01066589355469, 'diff_eval': 2454.4064615986094} step=25000
2025-12-06 05:23.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.43it/s, imitator_loss=0.0139, critic_loss=271, actor_loss=-0.0161]


2025-12-06 05:23.48 [info     ] BEAR_20251206050833: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.005067759990692138, 'time_algorithm_update': 0.024070226907730103, 'imitator_loss': 0.013895610335282981, 'critic_loss': 271.100818939209, 'actor_loss': -0.016112258065957577, 'time_step': 0.029394423246383666, 'td_error': 78.4268285446644, 'value_scale': -91.37246640714795, 'discounted_advantage': 61.14116251086808, 'initial_state': -109.22736358642578, 'diff_eval': 2458.476412329994} step=26000
2025-12-06 05:23.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.96it/s, imitator_loss=0.0136, critic_loss=352, actor_loss=-0.0157]


2025-12-06 05:24.25 [info     ] BEAR_20251206050833: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.005122809648513794, 'time_algorithm_update': 0.025369943618774413, 'imitator_loss': 0.013584516003727913, 'critic_loss': 353.5124485321045, 'actor_loss': -0.015683429279830307, 'time_step': 0.03075863313674927, 'td_error': 112.82753394444539, 'value_scale': -79.16919140396554, 'discounted_advantage': 39.299356832433574, 'initial_state': -105.3735580444336, 'diff_eval': 2558.64404556491} step=27000
2025-12-06 05:24.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.64it/s, imitator_loss=0.0132, critic_loss=507, actor_loss=-0.0157]


2025-12-06 05:24.59 [info     ] BEAR_20251206050833: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.004772279500961304, 'time_algorithm_update': 0.023252978801727296, 'imitator_loss': 0.013229549356736243, 'critic_loss': 508.67952337646483, 'actor_loss': -0.015755905918311327, 'time_step': 0.0282956862449646, 'td_error': 168.7598838419441, 'value_scale': -62.70298278540972, 'discounted_advantage': 14.21964578529259, 'initial_state': -93.57742309570312, 'diff_eval': 2539.7807603126953} step=28000
2025-12-06 05:24.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:30<00:00, 32.96it/s, imitator_loss=0.0129, critic_loss=795, actor_loss=-0.0155]


2025-12-06 05:25.35 [info     ] BEAR_20251206050833: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.005252368450164795, 'time_algorithm_update': 0.02429944396018982, 'imitator_loss': 0.012948988061863929, 'critic_loss': 796.542310760498, 'actor_loss': -0.015506559547502548, 'time_step': 0.029825263023376465, 'td_error': 304.44741432648306, 'value_scale': -35.61633204578829, 'discounted_advantage': -29.216888623733475, 'initial_state': -72.71582794189453, 'diff_eval': 2556.951370238784} step=29000
2025-12-06 05:25.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.15it/s, imitator_loss=0.0127, critic_loss=1.31e+3, actor_loss=-0.0152]


2025-12-06 05:26.10 [info     ] BEAR_20251206050833: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.004873071908950806, 'time_algorithm_update': 0.023633864879608155, 'imitator_loss': 0.012683376346249133, 'critic_loss': 1316.90245904541, 'actor_loss': -0.015266005862038582, 'time_step': 0.028766478300094604, 'td_error': 589.8014319513809, 'value_scale': -2.364556867351376, 'discounted_advantage': -86.72524244179299, 'initial_state': -58.48160171508789, 'diff_eval': 2588.1806656550684} step=30000
2025-12-06 05:26.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.38it/s, imitator_loss=0.0122, critic_loss=2.11e+3, actor_loss=-0.0153]


2025-12-06 05:26.44 [info     ] BEAR_20251206050833: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.004894400358200073, 'time_algorithm_update': 0.023435508728027344, 'imitator_loss': 0.012189026185777038, 'critic_loss': 2111.4518637695314, 'actor_loss': -0.015275426070555114, 'time_step': 0.028580075025558472, 'td_error': 1040.2957348188183, 'value_scale': 49.62546120687682, 'discounted_advantage': -163.3488988986735, 'initial_state': -24.736108779907227, 'diff_eval': 2239.2443512761192} step=31000
2025-12-06 05:26.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.33it/s, imitator_loss=0.0121, critic_loss=3.07e+3, actor_loss=-0.015]


2025-12-06 05:27.20 [info     ] BEAR_20251206050833: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.004985416889190674, 'time_algorithm_update': 0.02423510456085205, 'imitator_loss': 0.012090575971174985, 'critic_loss': 3072.4309208984373, 'actor_loss': -0.015037639037705958, 'time_step': 0.029498260021209716, 'td_error': 1574.690277857494, 'value_scale': 105.52846779192782, 'discounted_advantage': -226.05025391031324, 'initial_state': 9.956888198852539, 'diff_eval': 2219.7835607708776} step=32000
2025-12-06 05:27.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.54it/s, imitator_loss=0.0116, critic_loss=3.89e+3, actor_loss=-0.015]


2025-12-06 05:27.55 [info     ] BEAR_20251206050833: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.0050405747890472415, 'time_algorithm_update': 0.02398906397819519, 'imitator_loss': 0.011625521210487932, 'critic_loss': 3890.5544259033204, 'actor_loss': -0.01499486189405434, 'time_step': 0.029294363498687744, 'td_error': 2020.3171396860735, 'value_scale': 164.75434343968982, 'discounted_advantage': -276.486357975576, 'initial_state': 52.8390998840332, 'diff_eval': 2192.8526548113014} step=33000
2025-12-06 05:27.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:28<00:00, 34.57it/s, imitator_loss=0.0116, critic_loss=4.26e+3, actor_loss=-0.0146]


2025-12-06 05:28.29 [info     ] BEAR_20251206050833: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.004868352174758911, 'time_algorithm_update': 0.02328469157218933, 'imitator_loss': 0.011584409064147622, 'critic_loss': 4261.108809814453, 'actor_loss': -0.014642130393534899, 'time_step': 0.02840552043914795, 'td_error': 2049.96254646204, 'value_scale': 223.5439801168162, 'discounted_advantage': -313.0128946797917, 'initial_state': 117.77386474609375, 'diff_eval': 2175.4663922117606} step=34000
2025-12-06 05:28.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:30<00:00, 33.22it/s, imitator_loss=0.0113, critic_loss=3.82e+3, actor_loss=-0.0148]


2025-12-06 05:29.05 [info     ] BEAR_20251206050833: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.005104512453079224, 'time_algorithm_update': 0.02412361145019531, 'imitator_loss': 0.011258070309180766, 'critic_loss': 3817.750281494141, 'actor_loss': -0.014788934137672186, 'time_step': 0.029525952339172365, 'td_error': 1748.3287184446444, 'value_scale': 266.9565353169725, 'discounted_advantage': -336.0161840193924, 'initial_state': 174.80621337890625, 'diff_eval': 2060.0751906178793} step=35000
2025-12-06 05:29.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.45it/s, imitator_loss=0.0109, critic_loss=3.01e+3, actor_loss=-0.0147]


2025-12-06 05:29.40 [info     ] BEAR_20251206050833: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.004989131927490235, 'time_algorithm_update': 0.024116751432418822, 'imitator_loss': 0.01090317143406719, 'critic_loss': 3003.803467529297, 'actor_loss': -0.01471709455922246, 'time_step': 0.02936930584907532, 'td_error': 1298.5281964062412, 'value_scale': 294.1628281336718, 'discounted_advantage': -350.14672647120744, 'initial_state': 216.85060119628906, 'diff_eval': 2420.5419909814605} step=36000
2025-12-06 05:29.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.66it/s, imitator_loss=0.0107, critic_loss=2.14e+3, actor_loss=-0.0145]


2025-12-06 05:30.15 [info     ] BEAR_20251206050833: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.0049572036266326905, 'time_algorithm_update': 0.023967079401016236, 'imitator_loss': 0.010741283984389156, 'critic_loss': 2131.85646673584, 'actor_loss': -0.014471250954869902, 'time_step': 0.029188682556152343, 'td_error': 853.906912271097, 'value_scale': 309.63039419936814, 'discounted_advantage': -344.12089272867183, 'initial_state': 245.78640747070312, 'diff_eval': 2153.7966929556674} step=37000
2025-12-06 05:30.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:29<00:00, 33.58it/s, imitator_loss=0.0104, critic_loss=1.37e+3, actor_loss=-0.0143]


2025-12-06 05:30.51 [info     ] BEAR_20251206050833: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.004981644868850708, 'time_algorithm_update': 0.023994621515274046, 'imitator_loss': 0.010448979198001324, 'critic_loss': 1370.100609802246, 'actor_loss': -0.014290337800281123, 'time_step': 0.029241860151290894, 'td_error': 504.35320994177175, 'value_scale': 314.61172278788746, 'discounted_advantage': -340.6969298888558, 'initial_state': 258.2830810546875, 'diff_eval': 2206.632597812568} step=38000
2025-12-06 05:30.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.26it/s, imitator_loss=0.0102, critic_loss=796, actor_loss=-0.0143]    


2025-12-06 05:31.25 [info     ] BEAR_20251206050833: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.004868222713470459, 'time_algorithm_update': 0.023531443357467652, 'imitator_loss': 0.010226843634154648, 'critic_loss': 794.4915007019043, 'actor_loss': -0.014327038779854775, 'time_step': 0.028657782554626466, 'td_error': 271.5353412029215, 'value_scale': 316.7607604136311, 'discounted_advantage': -327.0026469243004, 'initial_state': 270.9819641113281, 'diff_eval': 2330.6100748839085} step=39000
2025-12-06 05:31.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:29<00:00, 34.03it/s, imitator_loss=0.0101, critic_loss=432, actor_loss=-0.0142]


2025-12-06 05:32.00 [info     ] BEAR_20251206050833: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.004899611473083496, 'time_algorithm_update': 0.023712726354599, 'imitator_loss': 0.010115630913991482, 'critic_loss': 430.95165380859373, 'actor_loss': -0.014180313422810287, 'time_step': 0.02886715841293335, 'td_error': 143.22533061014508, 'value_scale': 314.17727107953743, 'discounted_advantage': -315.2414197584422, 'initial_state': 281.2456970214844, 'diff_eval': 2350.608554936025} step=40000
2025-12-06 05:32.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.41it/s, imitator_loss=0.00977, critic_loss=2.22e+3, actor_loss=-479, temp=1.05, temp_loss=-2.08, mmd_loss=0.338, alpha=1.05]


2025-12-06 05:32.48 [info     ] BEAR_20251206050833: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.005155761241912842, 'time_algorithm_update': 0.03655906701087951, 'imitator_loss': 0.009776718865614384, 'critic_loss': 2240.547519744873, 'actor_loss': -480.3437773132324, 'temp': 1.049646134018898, 'temp_loss': -2.0789659951720387, 'mmd_loss': 0.33780340950097887, 'alpha': 1.0536496858596802, 'time_step': 0.042028372526168825, 'td_error': 6013.56991187728, 'value_scale': 581.1333743252878, 'discounted_advantage': -1014.7463002530686, 'initial_state': 440.6792907714844, 'diff_eval': 113334.37912405547} step=41000
2025-12-06 05:32.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.42it/s, imitator_loss=0.00979, critic_loss=8.34e+3, actor_loss=-754, temp=1.17, temp_loss=-2.66, mmd_loss=0.389, alpha=1.17]


2025-12-06 05:33.36 [info     ] BEAR_20251206050833: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.005203855276107788, 'time_algorithm_update': 0.036469383478164676, 'imitator_loss': 0.009792587213218212, 'critic_loss': 8376.146787353515, 'actor_loss': -755.5164854736328, 'temp': 1.1688177318572999, 'temp_loss': -2.6661616816520692, 'mmd_loss': 0.3894350126683712, 'alpha': 1.170954822421074, 'time_step': 0.041980026960372924, 'td_error': 14555.7855110776, 'value_scale': 904.4685191356726, 'discounted_advantage': -1478.4463420062214, 'initial_state': 685.4669799804688, 'diff_eval': 113384.63691822701} step=42000
2025-12-06 05:33.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.02it/s, imitator_loss=0.00941, critic_loss=2.54e+4, actor_loss=-1.15e+3, temp=1.31, temp_loss=-3.51, mmd_loss=0.449, alpha=1.31]


2025-12-06 05:34.23 [info     ] BEAR_20251206050833: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.004993280410766601, 'time_algorithm_update': 0.035653083562850955, 'imitator_loss': 0.009406407356727868, 'critic_loss': 25587.452813476564, 'actor_loss': -1148.920401550293, 'temp': 1.3139101781845093, 'temp_loss': -3.5136730713844297, 'mmd_loss': 0.4495000588297844, 'alpha': 1.3061673629283905, 'time_step': 0.04094799518585205, 'td_error': 37747.85709301164, 'value_scale': 1387.4313110453884, 'discounted_advantage': -2207.8085032617314, 'initial_state': 1108.8629150390625, 'diff_eval': 113412.7494697283} step=43000
2025-12-06 05:34.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.77it/s, imitator_loss=0.00935, critic_loss=6.84e+4, actor_loss=-1.71e+3, temp=1.48, temp_loss=-4.53, mmd_loss=0.519, alpha=1.46]


2025-12-06 05:35.12 [info     ] BEAR_20251206050833: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.005031246900558472, 'time_algorithm_update': 0.0360544445514679, 'imitator_loss': 0.009381615240126848, 'critic_loss': 68560.14114257813, 'actor_loss': -1709.895968383789, 'temp': 1.482895003437996, 'temp_loss': -4.53228851556778, 'mmd_loss': 0.5191514904797078, 'alpha': 1.4604184185266496, 'time_step': 0.0413921263217926, 'td_error': 87615.13064920533, 'value_scale': 2053.694563194972, 'discounted_advantage': -3185.5712792075465, 'initial_state': 1721.7294921875, 'diff_eval': 113434.38852345674} step=44000
2025-12-06 05:35.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.98it/s, imitator_loss=0.00913, critic_loss=1.66e+5, actor_loss=-2.46e+3, temp=1.67, temp_loss=-5.52, mmd_loss=0.592, alpha=1.63]


2025-12-06 05:35.59 [info     ] BEAR_20251206050833: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.0048982431888580326, 'time_algorithm_update': 0.03580500531196594, 'imitator_loss': 0.009133611724246293, 'critic_loss': 166882.7323671875, 'actor_loss': -2468.868549560547, 'temp': 1.6712040361166, 'temp_loss': -5.527962088108063, 'mmd_loss': 0.5921636081933975, 'alpha': 1.6337075148820877, 'time_step': 0.041006714344024656, 'td_error': 187606.17290599787, 'value_scale': 2945.605114408431, 'discounted_advantage': -4374.142965428611, 'initial_state': 2566.12353515625, 'diff_eval': 113439.62101354223} step=45000
2025-12-06 05:36.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.99it/s, imitator_loss=0.00899, critic_loss=3.68e+5, actor_loss=-3.48e+3, temp=1.88, temp_loss=-6.62, mmd_loss=0.673, alpha=1.83]


2025-12-06 05:36.48 [info     ] BEAR_20251206050833: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.005125329256057739, 'time_algorithm_update': 0.03731098747253418, 'imitator_loss': 0.009001366464886815, 'critic_loss': 368723.395953125, 'actor_loss': -3486.572103027344, 'temp': 1.8789830930233002, 'temp_loss': -6.620657638549805, 'mmd_loss': 0.6729620364308357, 'alpha': 1.8271443066596984, 'time_step': 0.042751174211502076, 'td_error': 399049.6429673584, 'value_scale': 4129.135993797641, 'discounted_advantage': -6614.1732772786645, 'initial_state': 3726.9912109375, 'diff_eval': 113443.8025318539} step=46000
2025-12-06 05:36.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.31it/s, imitator_loss=0.00874, critic_loss=7.86e+5, actor_loss=-4.88e+3, temp=2.11, temp_loss=-7.89, mmd_loss=0.764, alpha=2.04]


2025-12-06 05:37.35 [info     ] BEAR_20251206050833: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.0049373266696929935, 'time_algorithm_update': 0.035216028928756714, 'imitator_loss': 0.00876154607720673, 'critic_loss': 789032.2593125, 'actor_loss': -4885.194822265625, 'temp': 2.1110598611831666, 'temp_loss': -7.897945584774018, 'mmd_loss': 0.7648456140756607, 'alpha': 2.0435043561458586, 'time_step': 0.040449429035186765, 'td_error': 826605.0721559357, 'value_scale': 5733.13943847247, 'discounted_advantage': -9519.999408443762, 'initial_state': 5379.83251953125, 'diff_eval': 113448.37494014156} step=47000
2025-12-06 05:37.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, imitator_loss=0.00863, critic_loss=1.62e+6, actor_loss=-6.74e+3, temp=2.37, temp_loss=-9.34, mmd_loss=0.867, alpha=2.28]


2025-12-06 05:38.22 [info     ] BEAR_20251206050833: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.005117687463760376, 'time_algorithm_update': 0.03586437082290649, 'imitator_loss': 0.008627826147712767, 'critic_loss': 1626677.8879375, 'actor_loss': -6746.812468261719, 'temp': 2.370495741844177, 'temp_loss': -9.343301301956176, 'mmd_loss': 0.8675908750295639, 'alpha': 2.2853577082157135, 'time_step': 0.04128298044204712, 'td_error': 1438411.8922288995, 'value_scale': 7821.787486943695, 'discounted_advantage': -11651.357442498123, 'initial_state': 7545.3408203125, 'diff_eval': 113456.09371778132} step=48000
2025-12-06 05:38.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.47it/s, imitator_loss=0.00837, critic_loss=3.23e+6, actor_loss=-9.16e+3, temp=2.66, temp_loss=-10.9, mmd_loss=0.981, alpha=2.55]


2025-12-06 05:39.10 [info     ] BEAR_20251206050833: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.005100365877151489, 'time_algorithm_update': 0.03646969437599182, 'imitator_loss': 0.0083719352632761, 'critic_loss': 3242024.011625, 'actor_loss': -9172.73014892578, 'temp': 2.6595031735897066, 'temp_loss': -10.930592202186585, 'mmd_loss': 0.9811577274799347, 'alpha': 2.555297756910324, 'time_step': 0.04187899136543274, 'td_error': 2725385.1760060517, 'value_scale': 10542.51217592532, 'discounted_advantage': -15337.992197594875, 'initial_state': 10341.8720703125, 'diff_eval': 113455.90920959115} step=49000
2025-12-06 05:39.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.44it/s, imitator_loss=0.00837, critic_loss=6.26e+6, actor_loss=-1.23e+4, temp=2.98, temp_loss=-12.6, mmd_loss=1.11, alpha=2.85]


2025-12-06 05:39.56 [info     ] BEAR_20251206050833: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.004923289775848389, 'time_algorithm_update': 0.03494840359687805, 'imitator_loss': 0.008361010279040784, 'critic_loss': 6276114.718, 'actor_loss': -12292.508473632812, 'temp': 2.9809274022579193, 'temp_loss': -12.628627669334412, 'mmd_loss': 1.1057971248626708, 'alpha': 2.856416619062424, 'time_step': 0.04017598557472229, 'td_error': 4976405.948945872, 'value_scale': 13932.567803970427, 'discounted_advantage': -20948.26010922958, 'initial_state': 13889.833984375, 'diff_eval': 113455.89433561006} step=50000
2025-12-06 05:39.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.97it/s, imitator_loss=0.0082, critic_loss=1.13e+7, actor_loss=-1.61e+4, temp=3.34, temp_loss=-14.5, mmd_loss=1.24, alpha=3.19]


2025-12-06 05:40.44 [info     ] BEAR_20251206050833: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.005015716075897216, 'time_algorithm_update': 0.03575186395645141, 'imitator_loss': 0.008193650818895549, 'critic_loss': 11323653.026, 'actor_loss': -16099.594456054687, 'temp': 3.338874258518219, 'temp_loss': -14.530452466011047, 'mmd_loss': 1.2454322472810746, 'alpha': 3.192455765724182, 'time_step': 0.0410534348487854, 'td_error': 8430563.072679518, 'value_scale': 18049.757960662457, 'discounted_advantage': -26304.501335139277, 'initial_state': 18221.59765625, 'diff_eval': 113462.57984293756} step=51000
2025-12-06 05:40.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.05it/s, imitator_loss=0.00804, critic_loss=1.94e+7, actor_loss=-2.05e+4, temp=3.74, temp_loss=-16.6, mmd_loss=1.4, alpha=3.57]


2025-12-06 05:41.31 [info     ] BEAR_20251206050833: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.0050053551197052, 'time_algorithm_update': 0.03561237025260925, 'imitator_loss': 0.008043388823512942, 'critic_loss': 19391675.291, 'actor_loss': -20561.977634765626, 'temp': 3.7379850392341614, 'temp_loss': -16.636370520591736, 'mmd_loss': 1.4007961430549623, 'alpha': 3.567619889497757, 'time_step': 0.04090600323677063, 'td_error': 13421201.086598706, 'value_scale': 22721.260886257074, 'discounted_advantage': -32491.109493549568, 'initial_state': 23143.544921875, 'diff_eval': 113460.18251926522} step=52000
2025-12-06 05:41.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.41it/s, imitator_loss=0.00798, critic_loss=3.14e+7, actor_loss=-2.56e+4, temp=4.18, temp_loss=-18.9, mmd_loss=1.57, alpha=3.98]


2025-12-06 05:42.17 [info     ] BEAR_20251206050833: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.004876069307327271, 'time_algorithm_update': 0.03510365247726441, 'imitator_loss': 0.00798414059355855, 'critic_loss': 31469988.064, 'actor_loss': -25657.81218359375, 'temp': 4.182555304050446, 'temp_loss': -18.91250173187256, 'mmd_loss': 1.5723082041740417, 'alpha': 3.986278102874756, 'time_step': 0.040265300750732425, 'td_error': 20953097.723941825, 'value_scale': 28120.184660355982, 'discounted_advantage': -39618.356973412156, 'initial_state': 28834.69921875, 'diff_eval': 113463.77687363092} step=53000
2025-12-06 05:42.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.14it/s, imitator_loss=0.00778, critic_loss=4.82e+7, actor_loss=-3.12e+4, temp=4.68, temp_loss=-21.4, mmd_loss=1.76, alpha=4.45]


2025-12-06 05:43.05 [info     ] BEAR_20251206050833: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.005168803930282593, 'time_algorithm_update': 0.0369700608253479, 'imitator_loss': 0.007787323037628084, 'critic_loss': 48283806.356, 'actor_loss': -31248.991158203124, 'temp': 4.678198968887329, 'temp_loss': -21.46092656326294, 'mmd_loss': 1.7642582333087922, 'alpha': 4.453577557563782, 'time_step': 0.042463131904602054, 'td_error': 29908477.333871413, 'value_scale': 33850.134863854255, 'discounted_advantage': -45267.338095275874, 'initial_state': 34827.86328125, 'diff_eval': 113464.39479154474} step=54000
2025-12-06 05:43.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.20it/s, imitator_loss=0.00767, critic_loss=7.08e+7, actor_loss=-3.74e+4, temp=5.23, temp_loss=-24.3, mmd_loss=1.98, alpha=4.97]


2025-12-06 05:43.54 [info     ] BEAR_20251206050833: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.005610498666763305, 'time_algorithm_update': 0.03645190763473511, 'imitator_loss': 0.0076577115962281825, 'critic_loss': 70912201.076, 'actor_loss': -37412.77343359375, 'temp': 5.2313277249336245, 'temp_loss': -24.319987884521485, 'mmd_loss': 1.9786906818151473, 'alpha': 4.975309544086456, 'time_step': 0.04238037657737732, 'td_error': 42529680.39297271, 'value_scale': 40392.051306776506, 'discounted_advantage': -51538.09383212263, 'initial_state': 41679.75390625, 'diff_eval': 113463.97124263128} step=55000
2025-12-06 05:43.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.13it/s, imitator_loss=0.00756, critic_loss=1.01e+8, actor_loss=-4.41e+4, temp=5.85, temp_loss=-27.5, mmd_loss=2.22, alpha=5.56]


2025-12-06 05:44.41 [info     ] BEAR_20251206050833: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.004970856428146362, 'time_algorithm_update': 0.03549794316291809, 'imitator_loss': 0.0075563852887135, 'critic_loss': 101305131.168, 'actor_loss': -44147.0024921875, 'temp': 5.848762715339661, 'temp_loss': -27.468260332107544, 'mmd_loss': 2.217377977848053, 'alpha': 5.55795196056366, 'time_step': 0.04077158856391907, 'td_error': 59102919.139173254, 'value_scale': 47259.59806226425, 'discounted_advantage': -62206.5890956948, 'initial_state': 48800.19140625, 'diff_eval': 113465.40833040095} step=56000
2025-12-06 05:44.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.43it/s, imitator_loss=0.00747, critic_loss=1.4e+8, actor_loss=-5.14e+4, temp=6.53, temp_loss=-30.9, mmd_loss=2.48, alpha=6.2] 


2025-12-06 05:45.31 [info     ] BEAR_20251206050833: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.004975716352462769, 'time_algorithm_update': 0.036721779108047484, 'imitator_loss': 0.007469587178435177, 'critic_loss': 140208338.688, 'actor_loss': -51407.19602734375, 'temp': 6.537055008888244, 'temp_loss': -30.87503981399536, 'mmd_loss': 2.479206559896469, 'alpha': 6.207989108085632, 'time_step': 0.041997928380966186, 'td_error': 81266519.18397725, 'value_scale': 54875.95621594719, 'discounted_advantage': -74209.40091528621, 'initial_state': 56638.38671875, 'diff_eval': 113464.8931775153} step=57000
2025-12-06 05:45.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.00it/s, imitator_loss=0.00743, critic_loss=1.88e+8, actor_loss=-5.91e+4, temp=7.3, temp_loss=-34.6, mmd_loss=2.77, alpha=6.93]


2025-12-06 05:46.18 [info     ] BEAR_20251206050833: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.005026867151260376, 'time_algorithm_update': 0.03560144805908203, 'imitator_loss': 0.007427176775876432, 'critic_loss': 188191758.296, 'actor_loss': -59098.29835546875, 'temp': 7.303705952167511, 'temp_loss': -34.62006895637512, 'mmd_loss': 2.7720065627098083, 'alpha': 6.933400691986084, 'time_step': 0.0409488263130188, 'td_error': 105065258.45489252, 'value_scale': 62594.528185247276, 'discounted_advantage': -82407.83960878015, 'initial_state': 64588.390625, 'diff_eval': 113465.26764279259} step=58000
2025-12-06 05:46.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.42it/s, imitator_loss=0.00729, critic_loss=2.45e+8, actor_loss=-6.71e+4, temp=8.15, temp_loss=-38.7, mmd_loss=3.1, alpha=7.74]


2025-12-06 05:47.06 [info     ] BEAR_20251206050833: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.005078777551651001, 'time_algorithm_update': 0.036626293182373044, 'imitator_loss': 0.007280634630704299, 'critic_loss': 245160292.0, 'actor_loss': -67161.02287890625, 'temp': 8.158537529945374, 'temp_loss': -38.70876349639892, 'mmd_loss': 3.097586054801941, 'alpha': 7.743212729454041, 'time_step': 0.04200842308998108, 'td_error': 134063889.72567107, 'value_scale': 70916.9523882806, 'discounted_advantage': -88857.83847942158, 'initial_state': 73005.8984375, 'diff_eval': 113466.10778739526} step=59000
2025-12-06 05:47.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.26it/s, imitator_loss=0.00718, critic_loss=3.11e+8, actor_loss=-7.54e+4, temp=9.11, temp_loss=-43.1, mmd_loss=3.45, alpha=8.64]


2025-12-06 05:47.53 [info     ] BEAR_20251206050833: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.0048676090240478516, 'time_algorithm_update': 0.035356910943984986, 'imitator_loss': 0.007176953820511698, 'critic_loss': 310950874.944, 'actor_loss': -75397.28584375, 'temp': 9.110303058624268, 'temp_loss': -43.07122491455078, 'mmd_loss': 3.455409917831421, 'alpha': 8.646806740760804, 'time_step': 0.04051577711105347, 'td_error': 168575289.4330187, 'value_scale': 79306.30501165654, 'discounted_advantage': -99725.22053664633, 'initial_state': 81384.9375, 'diff_eval': 113464.45569548562} step=60000
2025-12-06 05:47.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.91it/s, imitator_loss=0.00711, critic_loss=3.87e+8, actor_loss=-8.38e+4, temp=10.2, temp_loss=-47.8, mmd_loss=3.85, alpha=9.65]


2025-12-06 05:48.40 [info     ] BEAR_20251206050833: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.00510032320022583, 'time_algorithm_update': 0.03565373706817627, 'imitator_loss': 0.007111778041580692, 'critic_loss': 387328557.52, 'actor_loss': -83872.087671875, 'temp': 10.170214179992676, 'temp_loss': -47.86507109832764, 'mmd_loss': 3.8518884649276734, 'alpha': 9.65490237045288, 'time_step': 0.04107669138908386, 'td_error': 210026095.04006454, 'value_scale': 88217.13281904862, 'discounted_advantage': -112174.0929368391, 'initial_state': 90265.5, 'diff_eval': 113466.55214923306} step=61000
2025-12-06 05:48.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, imitator_loss=0.00706, critic_loss=4.8e+8, actor_loss=-9.26e+4, temp=11.3, temp_loss=-53.2, mmd_loss=4.29, alpha=10.8]


2025-12-06 05:49.28 [info     ] BEAR_20251206050833: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.005010564327239991, 'time_algorithm_update': 0.03596878337860107, 'imitator_loss': 0.007053271100157871, 'critic_loss': 480850620.64, 'actor_loss': -92670.025140625, 'temp': 11.352147391319274, 'temp_loss': -53.21751530075073, 'mmd_loss': 4.296624343395234, 'alpha': 10.780149456977844, 'time_step': 0.04128472495079041, 'td_error': 256454287.69358447, 'value_scale': 97530.79594182209, 'discounted_advantage': -123175.96071982983, 'initial_state': 99433.7421875, 'diff_eval': 113464.91137062194} step=62000
2025-12-06 05:49.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.81it/s, imitator_loss=0.00686, critic_loss=5.8e+8, actor_loss=-1.02e+5, temp=12.7, temp_loss=-59.2, mmd_loss=4.79, alpha=12]  


2025-12-06 05:50.15 [info     ] BEAR_20251206050833: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.005046457290649414, 'time_algorithm_update': 0.03594446897506714, 'imitator_loss': 0.006856667907675728, 'critic_loss': 580118806.656, 'actor_loss': -102417.3513828125, 'temp': 12.672397803306579, 'temp_loss': -59.20311149597168, 'mmd_loss': 4.792577266693115, 'alpha': 12.036861327171327, 'time_step': 0.041293336629867554, 'td_error': 317204605.21554446, 'value_scale': 107786.77016646584, 'discounted_advantage': -138071.60586085563, 'initial_state': 109565.2421875, 'diff_eval': 113464.03510359247} step=63000
2025-12-06 05:50.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.02it/s, imitator_loss=0.00677, critic_loss=7.1e+8, actor_loss=-1.13e+5, temp=14.1, temp_loss=-65.6, mmd_loss=5.34, alpha=13.4]


2025-12-06 05:51.02 [info     ] BEAR_20251206050833: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.00498966383934021, 'time_algorithm_update': 0.035653207540512086, 'imitator_loss': 0.00675870646443218, 'critic_loss': 710845584.0, 'actor_loss': -112701.7754296875, 'temp': 14.1441504611969, 'temp_loss': -65.65755917739868, 'mmd_loss': 5.342831779956818, 'alpha': 13.43957482433319, 'time_step': 0.04094305324554443, 'td_error': 383682266.45459354, 'value_scale': 118733.10443406852, 'discounted_advantage': -148376.21164716498, 'initial_state': 120291.2265625, 'diff_eval': 113465.19931237475} step=64000
2025-12-06 05:51.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.84it/s, imitator_loss=0.00675, critic_loss=8.61e+8, actor_loss=-1.24e+5, temp=15.8, temp_loss=-72.9, mmd_loss=5.95, alpha=15] 


2025-12-06 05:51.49 [info     ] BEAR_20251206050833: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.0050426371097564695, 'time_algorithm_update': 0.035904821395874024, 'imitator_loss': 0.0067506043999455865, 'critic_loss': 861961455.648, 'actor_loss': -123987.5007578125, 'temp': 15.785884199142457, 'temp_loss': -72.91549217224122, 'mmd_loss': 5.955618988990784, 'alpha': 15.005671644210816, 'time_step': 0.04124294114112854, 'td_error': 475283520.5035821, 'value_scale': 130886.46969300084, 'discounted_advantage': -170370.25305593456, 'initial_state': 132365.78125, 'diff_eval': 113464.9568442307} step=65000
2025-12-06 05:51.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.59it/s, imitator_loss=0.00667, critic_loss=1.05e+9, actor_loss=-1.36e+5, temp=17.6, temp_loss=-81, mmd_loss=6.64, alpha=16.7] 


2025-12-06 05:52.37 [info     ] BEAR_20251206050833: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.005083128452301026, 'time_algorithm_update': 0.03627304840087891, 'imitator_loss': 0.0066659912778995935, 'critic_loss': 1050941955.264, 'actor_loss': -136128.63034375, 'temp': 17.619022846221924, 'temp_loss': -81.02733339691162, 'mmd_loss': 6.64049666595459, 'alpha': 16.754154666900636, 'time_step': 0.04166918969154358, 'td_error': 565298678.6984198, 'value_scale': 143317.34300345767, 'discounted_advantage': -180213.9661823697, 'initial_state': 144677.546875, 'diff_eval': 113461.79815256865} step=66000
2025-12-06 05:52.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.60it/s, imitator_loss=0.00666, critic_loss=1.26e+9, actor_loss=-1.49e+5, temp=19.7, temp_loss=-89.9, mmd_loss=7.4, alpha=18.7]


2025-12-06 05:53.25 [info     ] BEAR_20251206050833: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.005059844255447388, 'time_algorithm_update': 0.03627941656112671, 'imitator_loss': 0.0066600034984294326, 'critic_loss': 1259309432.96, 'actor_loss': -149166.666625, 'temp': 19.664312517166138, 'temp_loss': -89.90731128692627, 'mmd_loss': 7.399760255813598, 'alpha': 18.706096029281618, 'time_step': 0.04165184903144836, 'td_error': 678022556.9556724, 'value_scale': 157025.35951906958, 'discounted_advantage': -192278.01500627, 'initial_state': 158493.421875, 'diff_eval': 113463.42118239423} step=67000
2025-12-06 05:53.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.39it/s, imitator_loss=0.00661, critic_loss=1.53e+9, actor_loss=-1.63e+5, temp=21.9, temp_loss=-99.9, mmd_loss=8.25, alpha=20.9]


2025-12-06 05:54.16 [info     ] BEAR_20251206050833: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.0053501749038696285, 'time_algorithm_update': 0.03823354244232178, 'imitator_loss': 0.006610165072837845, 'critic_loss': 1534392288.32, 'actor_loss': -163553.26190625, 'temp': 21.947934995651245, 'temp_loss': -99.91535692596436, 'mmd_loss': 8.251781918525696, 'alpha': 20.885533319473268, 'time_step': 0.0439015998840332, 'td_error': 822484823.2624031, 'value_scale': 172193.9568446144, 'discounted_advantage': -212964.40814851207, 'initial_state': 173503.484375, 'diff_eval': 113464.09280270163} step=68000
2025-12-06 05:54.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.96it/s, imitator_loss=0.00656, critic_loss=1.86e+9, actor_loss=-1.79e+5, temp=24.5, temp_loss=-111, mmd_loss=9.2, alpha=23.3]


2025-12-06 05:55.03 [info     ] BEAR_20251206050833: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.005058984279632569, 'time_algorithm_update': 0.035591835975646975, 'imitator_loss': 0.00655184972169809, 'critic_loss': 1857536743.552, 'actor_loss': -179085.85453125, 'temp': 24.49530550765991, 'temp_loss': -110.76285648345947, 'mmd_loss': 9.201196608543396, 'alpha': 23.31956786727905, 'time_step': 0.04097670125961304, 'td_error': 993508947.5282769, 'value_scale': 188552.87976739314, 'discounted_advantage': -236078.39909008262, 'initial_state': 190067.484375, 'diff_eval': 113461.1371846099} step=69000
2025-12-06 05:55.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.09it/s, imitator_loss=0.00654, critic_loss=2.19e+9, actor_loss=-1.95e+5, temp=27.3, temp_loss=-123, mmd_loss=10.2, alpha=26] 


2025-12-06 05:55.52 [info     ] BEAR_20251206050833: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.0050090205669403075, 'time_algorithm_update': 0.03676325368881225, 'imitator_loss': 0.006540331149473787, 'critic_loss': 2196236999.936, 'actor_loss': -195261.64265625, 'temp': 27.335548906326295, 'temp_loss': -122.68260128784179, 'mmd_loss': 10.245092748641968, 'alpha': 26.035530281066894, 'time_step': 0.042570322036743165, 'td_error': 1180484885.411389, 'value_scale': 205098.83327876154, 'discounted_advantage': -258815.37529587845, 'initial_state': 206720.15625, 'diff_eval': 113463.62719840396} step=70000
2025-12-06 05:55.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.77it/s, imitator_loss=0.00633, critic_loss=2.6e+9, actor_loss=-2.12e+5, temp=30.5, temp_loss=-136, mmd_loss=11.4, alpha=29.1]


2025-12-06 05:56.40 [info     ] BEAR_20251206050833: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.0050548059940338135, 'time_algorithm_update': 0.03598338532447815, 'imitator_loss': 0.006331751669058576, 'critic_loss': 2604986309.76, 'actor_loss': -212039.157875, 'temp': 30.50151323890686, 'temp_loss': -135.72393808746338, 'mmd_loss': 11.412360455513001, 'alpha': 29.067080152511597, 'time_step': 0.041357984066009525, 'td_error': 1391363646.3354876, 'value_scale': 222420.84925738684, 'discounted_advantage': -275952.00099690753, 'initial_state': 223979.453125, 'diff_eval': 113462.9766112437} step=71000
2025-12-06 05:56.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.17it/s, imitator_loss=0.00632, critic_loss=3.09e+9, actor_loss=-2.29e+5, temp=34, temp_loss=-150, mmd_loss=12.7, alpha=32.4] 


2025-12-06 05:57.28 [info     ] BEAR_20251206050833: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.005104962587356567, 'time_algorithm_update': 0.03702370357513428, 'imitator_loss': 0.006317891633836553, 'critic_loss': 3092559068.8, 'actor_loss': -229403.901109375, 'temp': 34.03045220947266, 'temp_loss': -149.9401875152588, 'mmd_loss': 12.707189915657043, 'alpha': 32.45102725982666, 'time_step': 0.0424422562122345, 'td_error': 1636103334.2638934, 'value_scale': 240173.26526482607, 'discounted_advantage': -306267.8150174962, 'initial_state': 241534.5625, 'diff_eval': 113460.569228819} step=72000
2025-12-06 05:57.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.99it/s, imitator_loss=0.00622, critic_loss=3.58e+9, actor_loss=-2.47e+5, temp=37.9, temp_loss=-165, mmd_loss=14.1, alpha=36.2]


2025-12-06 05:58.15 [info     ] BEAR_20251206050833: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.005036730766296387, 'time_algorithm_update': 0.035616855144500734, 'imitator_loss': 0.006222367259673774, 'critic_loss': 3579534974.208, 'actor_loss': -247270.33621875, 'temp': 37.96316004180908, 'temp_loss': -165.4402318572998, 'mmd_loss': 14.142344131469727, 'alpha': 36.22787893295288, 'time_step': 0.040967945575714114, 'td_error': 1882697199.0926023, 'value_scale': 258293.3005094824, 'discounted_advantage': -321945.3678192746, 'initial_state': 259314.40625, 'diff_eval': 113459.92056191278} step=73000
2025-12-06 05:58.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.54it/s, imitator_loss=0.00622, critic_loss=4.14e+9, actor_loss=-2.66e+5, temp=42.3, temp_loss=-183, mmd_loss=15.7, alpha=40.4]


2025-12-06 05:59.03 [info     ] BEAR_20251206050833: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.005071448802947998, 'time_algorithm_update': 0.036386372566223145, 'imitator_loss': 0.0062153034717775885, 'critic_loss': 4146751770.112, 'actor_loss': -265691.52159375, 'temp': 42.348592155456544, 'temp_loss': -182.7037859802246, 'mmd_loss': 15.73993350982666, 'alpha': 40.44313109588623, 'time_step': 0.04177203226089478, 'td_error': 2203658552.771023, 'value_scale': 278358.09365177073, 'discounted_advantage': -347188.2220958728, 'initial_state': 279197.625, 'diff_eval': 113459.7475394723} step=74000
2025-12-06 05:59.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.06it/s, imitator_loss=0.00619, critic_loss=4.78e+9, actor_loss=-2.85e+5, temp=47.2, temp_loss=-201, mmd_loss=17.5, alpha=45.1]


2025-12-06 05:59.50 [info     ] BEAR_20251206050833: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.0049546751976013185, 'time_algorithm_update': 0.03560110664367676, 'imitator_loss': 0.006191928715445101, 'critic_loss': 4783410915.072, 'actor_loss': -285170.51071875, 'temp': 47.23963341903686, 'temp_loss': -201.53443516540528, 'mmd_loss': 17.520516416549683, 'alpha': 45.14946838378906, 'time_step': 0.040864566326141355, 'td_error': 2545016690.9114733, 'value_scale': 298424.3923276404, 'discounted_advantage': -380365.7894648049, 'initial_state': 298907.28125, 'diff_eval': 113459.17232423257} step=75000
2025-12-06 05:59.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.86it/s, imitator_loss=0.00606, critic_loss=5.54e+9, actor_loss=-3.06e+5, temp=52.7, temp_loss=-222, mmd_loss=19.5, alpha=50.4]


2025-12-06 06:00.37 [info     ] BEAR_20251206050833: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.005081044197082519, 'time_algorithm_update': 0.03583622884750366, 'imitator_loss': 0.006060535907978192, 'critic_loss': 5546037516.8, 'actor_loss': -305723.4525, 'temp': 52.69508387756348, 'temp_loss': -222.4859375, 'mmd_loss': 19.487980091094972, 'alpha': 50.40162158203125, 'time_step': 0.041215567827224735, 'td_error': 2933094475.4478765, 'value_scale': 320001.6596028919, 'discounted_advantage': -402007.942923237, 'initial_state': 320015.40625, 'diff_eval': 113459.3551779121} step=76000
2025-12-06 06:00.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.66it/s, imitator_loss=0.00616, critic_loss=6.36e+9, actor_loss=-3.28e+5, temp=58.7, temp_loss=-245, mmd_loss=21.7, alpha=56.2]


2025-12-06 06:01.25 [info     ] BEAR_20251206050833: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.005123968124389648, 'time_algorithm_update': 0.03614246606826782, 'imitator_loss': 0.006163282179739326, 'critic_loss': 6362826946.048, 'actor_loss': -327629.3508125, 'temp': 58.77714336395264, 'temp_loss': -245.09573468017578, 'mmd_loss': 21.681602109909058, 'alpha': 56.26438678741455, 'time_step': 0.04157106733322143, 'td_error': 3375623650.622241, 'value_scale': 342852.76038610644, 'discounted_advantage': -426646.9744486479, 'initial_state': 342307.96875, 'diff_eval': 113458.98179126365} step=77000
2025-12-06 06:01.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.73it/s, imitator_loss=0.00607, critic_loss=7.29e+9, actor_loss=-3.5e+5, temp=65.5, temp_loss=-270, mmd_loss=24.1, alpha=62.8]


2025-12-06 06:02.12 [info     ] BEAR_20251206050833: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.005083953857421875, 'time_algorithm_update': 0.0360231556892395, 'imitator_loss': 0.006067421711981297, 'critic_loss': 7302781532.672, 'actor_loss': -350174.74153125, 'temp': 65.55650256347656, 'temp_loss': -270.10991105651857, 'mmd_loss': 24.13261648750305, 'alpha': 62.80979787445068, 'time_step': 0.04141986632347107, 'td_error': 3844243676.53961, 'value_scale': 366036.5211651299, 'discounted_advantage': -448301.0898930612, 'initial_state': 364855.59375, 'diff_eval': 113452.6044015045} step=78000
2025-12-06 06:02.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.74it/s, imitator_loss=0.00606, critic_loss=8.35e+9, actor_loss=-3.74e+5, temp=73.1, temp_loss=-297, mmd_loss=26.8, alpha=70.1]


2025-12-06 06:03.00 [info     ] BEAR_20251206050833: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.005058337450027466, 'time_algorithm_update': 0.036048078060150146, 'imitator_loss': 0.006059823357267305, 'critic_loss': 8352639482.88, 'actor_loss': -373740.6419375, 'temp': 73.11559555053711, 'temp_loss': -297.4624797668457, 'mmd_loss': 26.8310031375885, 'alpha': 70.11621241760254, 'time_step': 0.041401002168655394, 'td_error': 4368253064.636458, 'value_scale': 390383.69954945514, 'discounted_advantage': -474833.1815538444, 'initial_state': 388491.84375, 'diff_eval': 113455.2337604916} step=79000
2025-12-06 06:03.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.72it/s, imitator_loss=0.00587, critic_loss=9.52e+9, actor_loss=-3.98e+5, temp=81.5, temp_loss=-327, mmd_loss=29.8, alpha=78.2]


2025-12-06 06:03.48 [info     ] BEAR_20251206050833: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.0050981385707855225, 'time_algorithm_update': 0.03601761031150818, 'imitator_loss': 0.005866691577015444, 'critic_loss': 9526946302.464, 'actor_loss': -398533.94975, 'temp': 81.54146006011963, 'temp_loss': -327.3595897521973, 'mmd_loss': 29.845956827163697, 'alpha': 78.27123128509521, 'time_step': 0.04142950797080994, 'td_error': 5012028873.570836, 'value_scale': 416642.3064752724, 'discounted_advantage': -518098.5314513404, 'initial_state': 414330.40625, 'diff_eval': 113456.19207692046} step=80000
2025-12-06 06:03.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.87it/s, imitator_loss=0.00583, critic_loss=1.09e+10, actor_loss=-4.24e+5, temp=90.9, temp_loss=-360, mmd_loss=33.2, alpha=87.3]


2025-12-06 06:04.37 [info     ] BEAR_20251206050833: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.005899841547012329, 'time_algorithm_update': 0.03680570721626282, 'imitator_loss': 0.0058309406088665125, 'critic_loss': 10863064554.496, 'actor_loss': -424500.43646875, 'temp': 90.93811766815186, 'temp_loss': -360.59460900878906, 'mmd_loss': 33.20550433158874, 'alpha': 87.37549974060059, 'time_step': 0.04300797700881958, 'td_error': 5720676599.868105, 'value_scale': 443826.2417618399, 'discounted_advantage': -555203.4784510119, 'initial_state': 440653.15625, 'diff_eval': 113455.43779526332} step=81000
2025-12-06 06:04.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.14it/s, imitator_loss=0.0059, critic_loss=1.23e+10, actor_loss=-4.52e+5, temp=101, temp_loss=-397, mmd_loss=36.9, alpha=97.5] 


2025-12-06 06:05.26 [info     ] BEAR_20251206050833: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.004957431316375732, 'time_algorithm_update': 0.03717332315444946, 'imitator_loss': 0.005904482311801985, 'critic_loss': 12324398429.184, 'actor_loss': -452414.923, 'temp': 101.41930895233155, 'temp_loss': -397.06630654907224, 'mmd_loss': 36.954505920410156, 'alpha': 97.54063676452637, 'time_step': 0.04245150256156922, 'td_error': 6479535338.650518, 'value_scale': 472805.9080050293, 'discounted_advantage': -583679.9716116771, 'initial_state': 469042.25, 'diff_eval': 113452.53057235884} step=82000
2025-12-06 06:05.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.96it/s, imitator_loss=0.00588, critic_loss=1.4e+10, actor_loss=-4.82e+5, temp=113, temp_loss=-437, mmd_loss=41.1, alpha=109]


2025-12-06 06:06.13 [info     ] BEAR_20251206050833: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.00500825047492981, 'time_algorithm_update': 0.03573561143875122, 'imitator_loss': 0.005879774277564138, 'critic_loss': 13950596224.0, 'actor_loss': -482205.78878125, 'temp': 113.10734986877442, 'temp_loss': -436.9424978942871, 'mmd_loss': 41.10043270492554, 'alpha': 108.88854204559325, 'time_step': 0.04104549288749695, 'td_error': 7389204403.007502, 'value_scale': 503826.87769803015, 'discounted_advantage': -622718.6402585368, 'initial_state': 499486.53125, 'diff_eval': 113454.79303233042} step=83000
2025-12-06 06:06.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.06it/s, imitator_loss=0.0057, critic_loss=1.61e+10, actor_loss=-5.13e+5, temp=126, temp_loss=-481, mmd_loss=45.7, alpha=121]


2025-12-06 06:06.59 [info     ] BEAR_20251206050833: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.004941091537475586, 'time_algorithm_update': 0.03565004134178162, 'imitator_loss': 0.005695047040237114, 'critic_loss': 16085122820.096, 'actor_loss': -513628.45275, 'temp': 126.13658183288574, 'temp_loss': -480.87264483642576, 'mmd_loss': 45.7140126914978, 'alpha': 121.5533108215332, 'time_step': 0.04089633846282959, 'td_error': 8421679576.54282, 'value_scale': 535846.3282166807, 'discounted_advantage': -675146.8713492249, 'initial_state': 530843.8125, 'diff_eval': 113449.73638445334} step=84000
2025-12-06 06:07.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.95it/s, imitator_loss=0.00556, critic_loss=1.82e+10, actor_loss=-5.46e+5, temp=141, temp_loss=-527, mmd_loss=50.8, alpha=136]


2025-12-06 06:07.47 [info     ] BEAR_20251206050833: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.005010367631912232, 'time_algorithm_update': 0.03574216771125793, 'imitator_loss': 0.005559850315563381, 'critic_loss': 18192751808.512, 'actor_loss': -546056.4104375, 'temp': 140.65244323730468, 'temp_loss': -527.447636291504, 'mmd_loss': 50.810467460632324, 'alpha': 135.68582339477538, 'time_step': 0.041044918537139895, 'td_error': 9401743850.202791, 'value_scale': 568521.3471945726, 'discounted_advantage': -670208.9503634202, 'initial_state': 563124.0625, 'diff_eval': 113451.33146636783} step=85000
2025-12-06 06:07.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.79it/s, imitator_loss=0.00564, critic_loss=2.06e+10, actor_loss=-5.79e+5, temp=157, temp_loss=-579, mmd_loss=56.5, alpha=151]


2025-12-06 06:08.34 [info     ] BEAR_20251206050833: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.005091353416442871, 'time_algorithm_update': 0.03589602327346802, 'imitator_loss': 0.005638891402166337, 'critic_loss': 20639433346.048, 'actor_loss': -578981.082875, 'temp': 156.8155546569824, 'temp_loss': -578.937208984375, 'mmd_loss': 56.49360410690308, 'alpha': 151.4581224975586, 'time_step': 0.04131439471244812, 'td_error': 10705560349.454384, 'value_scale': 602346.8335865465, 'discounted_advantage': -766799.6348337848, 'initial_state': 595861.4375, 'diff_eval': 113449.38076083877} step=86000
2025-12-06 06:08.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.15it/s, imitator_loss=0.00568, critic_loss=2.31e+10, actor_loss=-6.13e+5, temp=175, temp_loss=-635, mmd_loss=62.8, alpha=169]


2025-12-06 06:09.21 [info     ] BEAR_20251206050833: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.004972351551055908, 'time_algorithm_update': 0.035427384614944456, 'imitator_loss': 0.005670251340139657, 'critic_loss': 23093151859.712, 'actor_loss': -613456.8395625, 'temp': 174.83816514587403, 'temp_loss': -635.071466003418, 'mmd_loss': 62.78182336044311, 'alpha': 169.06200117492676, 'time_step': 0.0407027370929718, 'td_error': 11904672915.332819, 'value_scale': 636646.0273208298, 'discounted_advantage': -787360.6806483297, 'initial_state': 629071.6875, 'diff_eval': 113447.80792191562} step=87000
2025-12-06 06:09.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.52it/s, imitator_loss=0.0056, critic_loss=2.59e+10, actor_loss=-6.49e+5, temp=195, temp_loss=-696, mmd_loss=69.7, alpha=189]


2025-12-06 06:10.09 [info     ] BEAR_20251206050833: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.005134922981262207, 'time_algorithm_update': 0.03638117647171021, 'imitator_loss': 0.005607005935395136, 'critic_loss': 25904415191.04, 'actor_loss': -648903.4826875, 'temp': 194.91408598327638, 'temp_loss': -696.2152481079102, 'mmd_loss': 69.76150625610352, 'alpha': 188.7088400115967, 'time_step': 0.04181762099266052, 'td_error': 13164291000.204575, 'value_scale': 671882.8789815591, 'discounted_advantage': -797190.6467029769, 'initial_state': 663720.0625, 'diff_eval': 113446.17014670232} step=88000
2025-12-06 06:10.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.02it/s, imitator_loss=0.0055, critic_loss=2.9e+10, actor_loss=-6.86e+5, temp=217, temp_loss=-763, mmd_loss=77.5, alpha=211] 


2025-12-06 06:10.56 [info     ] BEAR_20251206050833: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.0050509550571441655, 'time_algorithm_update': 0.03560954833030701, 'imitator_loss': 0.0054970899634063245, 'critic_loss': 29051876159.488, 'actor_loss': -685776.98475, 'temp': 217.30097734069824, 'temp_loss': -763.6862981567383, 'mmd_loss': 77.56367401123048, 'alpha': 210.64201277160643, 'time_step': 0.040956740856170654, 'td_error': 14820574512.415693, 'value_scale': 709940.982580679, 'discounted_advantage': -867837.7310010536, 'initial_state': 700979.625, 'diff_eval': 113440.06549219353} step=89000
2025-12-06 06:10.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.67it/s, imitator_loss=0.00559, critic_loss=3.25e+10, actor_loss=-7.24e+5, temp=242, temp_loss=-836, mmd_loss=86.1, alpha=235]


2025-12-06 06:11.44 [info     ] BEAR_20251206050833: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.005119145154953003, 'time_algorithm_update': 0.036075806140899655, 'imitator_loss': 0.005589318410726264, 'critic_loss': 32504127025.152, 'actor_loss': -724635.2566875, 'temp': 242.24438577270507, 'temp_loss': -836.7814521484376, 'mmd_loss': 86.18547127532959, 'alpha': 235.12361700439453, 'time_step': 0.04149941635131836, 'td_error': 16654194504.031603, 'value_scale': 750357.001178751, 'discounted_advantage': -932142.4241019806, 'initial_state': 740883.1875, 'diff_eval': 113443.59880100451} step=90000
2025-12-06 06:11.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, imitator_loss=0.00546, critic_loss=3.63e+10, actor_loss=-7.65e+5, temp=270, temp_loss=-915, mmd_loss=95.6, alpha=262]


2025-12-06 06:12.31 [info     ] BEAR_20251206050833: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.005031766653060913, 'time_algorithm_update': 0.03597564482688904, 'imitator_loss': 0.005463616967666894, 'critic_loss': 36270835378.176, 'actor_loss': -764898.569, 'temp': 270.03426617431643, 'temp_loss': -914.9431042480469, 'mmd_loss': 95.6943966140747, 'alpha': 262.4397292022705, 'time_step': 0.041309520244598386, 'td_error': 18467645902.97883, 'value_scale': 790195.8108497486, 'discounted_advantage': -970838.05956939, 'initial_state': 779248.375, 'diff_eval': 113441.6355952785} step=91000
2025-12-06 06:12.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.76it/s, imitator_loss=0.00547, critic_loss=4.03e+10, actor_loss=-8.06e+5, temp=301, temp_loss=-999, mmd_loss=106, alpha=293]


2025-12-06 06:13.19 [info     ] BEAR_20251206050833: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.005076164007186889, 'time_algorithm_update': 0.03602145075798035, 'imitator_loss': 0.005466929038986564, 'critic_loss': 40342219601.92, 'actor_loss': -805785.6388125, 'temp': 300.9472091064453, 'temp_loss': -999.3334321899414, 'mmd_loss': 106.28049096679688, 'alpha': 292.915655670166, 'time_step': 0.04140125179290771, 'td_error': 20557949822.224487, 'value_scale': 831276.235383487, 'discounted_advantage': -1041032.8223048606, 'initial_state': 819843.125, 'diff_eval': 113438.66404761212} step=92000
2025-12-06 06:13.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.36it/s, imitator_loss=0.00552, critic_loss=4.5e+10, actor_loss=-8.48e+5, temp=335, temp_loss=-1.09e+3, mmd_loss=118, alpha=327]


2025-12-06 06:14.07 [info     ] BEAR_20251206050833: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.00580710244178772, 'time_algorithm_update': 0.03595282745361328, 'imitator_loss': 0.0055198302490171046, 'critic_loss': 45029410269.184, 'actor_loss': -848329.1140625, 'temp': 335.4178280029297, 'temp_loss': -1092.9532200317383, 'mmd_loss': 118.04387272644043, 'alpha': 326.93565884399413, 'time_step': 0.04207098340988159, 'td_error': 22457228158.584038, 'value_scale': 872948.2260320621, 'discounted_advantage': -1041969.5946844645, 'initial_state': 860297.9375, 'diff_eval': 113436.74610875298} step=93000
2025-12-06 06:14.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.87it/s, imitator_loss=0.00529, critic_loss=5.01e+10, actor_loss=-8.92e+5, temp=374, temp_loss=-1.19e+3, mmd_loss=131, alpha=365]


2025-12-06 06:14.54 [info     ] BEAR_20251206050833: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.0050295450687408445, 'time_algorithm_update': 0.03588840484619141, 'imitator_loss': 0.005292211561230943, 'critic_loss': 50110514800.64, 'actor_loss': -891843.1810625, 'temp': 373.7863827819824, 'temp_loss': -1192.0394869384766, 'mmd_loss': 131.038343208313, 'alpha': 364.8964284057617, 'time_step': 0.041225905656814575, 'td_error': 24955644365.728447, 'value_scale': 917763.9088694467, 'discounted_advantage': -1115276.6268184227, 'initial_state': 904408.0, 'diff_eval': 113444.07349773704} step=94000
2025-12-06 06:14.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.90it/s, imitator_loss=0.00536, critic_loss=5.5e+10, actor_loss=-9.37e+5, temp=416, temp_loss=-1.3e+3, mmd_loss=145, alpha=407] 


2025-12-06 06:15.43 [info     ] BEAR_20251206050833: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.004989765167236328, 'time_algorithm_update': 0.03579526543617249, 'imitator_loss': 0.005356590409995988, 'critic_loss': 54996121575.424, 'actor_loss': -937374.3664375, 'temp': 416.5357899169922, 'temp_loss': -1301.4539083251952, 'mmd_loss': 145.52227418518066, 'alpha': 407.2681454772949, 'time_step': 0.04109455919265747, 'td_error': 27652821305.337765, 'value_scale': 963183.1215423302, 'discounted_advantage': -1194203.491674339, 'initial_state': 948918.0625, 'diff_eval': 113428.6283126281} step=95000
2025-12-06 06:15.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.69it/s, imitator_loss=0.00533, critic_loss=6.11e+10, actor_loss=-9.84e+5, temp=464, temp_loss=-1.42e+3, mmd_loss=162, alpha=454]


2025-12-06 06:16.30 [info     ] BEAR_20251206050833: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.005035921335220337, 'time_algorithm_update': 0.0360818510055542, 'imitator_loss': 0.005314951474778354, 'critic_loss': 61163919765.504, 'actor_loss': -984249.3985625, 'temp': 464.14898095703126, 'temp_loss': -1419.1602943115233, 'mmd_loss': 161.58993272399903, 'alpha': 454.56262496948244, 'time_step': 0.04144209623336792, 'td_error': 30530805501.53694, 'value_scale': 1010198.3776718357, 'discounted_advantage': -1263342.3402311516, 'initial_state': 994967.5, 'diff_eval': 113427.19358302561} step=96000
2025-12-06 06:16.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.40it/s, imitator_loss=0.00526, critic_loss=6.74e+10, actor_loss=-1.03e+6, temp=517, temp_loss=-1.54e+3, mmd_loss=179, alpha=507]


2025-12-06 06:17.19 [info     ] BEAR_20251206050833: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.005125319480895996, 'time_algorithm_update': 0.03615214395523071, 'imitator_loss': 0.005260260093724355, 'critic_loss': 67400223023.104, 'actor_loss': -1032183.045875, 'temp': 517.131998626709, 'temp_loss': -1544.2778103027345, 'mmd_loss': 179.36080558776857, 'alpha': 507.33156591796876, 'time_step': 0.04156692743301391, 'td_error': 33535780211.441887, 'value_scale': 1058068.3540444258, 'discounted_advantage': -1328888.1133981096, 'initial_state': 1041729.75, 'diff_eval': 113425.51927763179} step=97000
2025-12-06 06:17.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:40<00:00, 25.00it/s, imitator_loss=0.00523, critic_loss=7.39e+10, actor_loss=-1.08e+6, temp=576, temp_loss=-1.68e+3, mmd_loss=199, alpha=566]


2025-12-06 06:18.04 [info     ] BEAR_20251206050833: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.0046170675754547115, 'time_algorithm_update': 0.03441406989097595, 'imitator_loss': 0.0052310758621897544, 'critic_loss': 73912560893.952, 'actor_loss': -1081471.5283125, 'temp': 576.1419147949218, 'temp_loss': -1680.183219848633, 'mmd_loss': 199.0914163208008, 'alpha': 566.2306655883789, 'time_step': 0.03931357026100159, 'td_error': 36512015664.12305, 'value_scale': 1107055.9560456832, 'discounted_advantage': -1354291.739595817, 'initial_state': 1089619.25, 'diff_eval': 113415.03830916603} step=98000
2025-12-06 06:18.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, imitator_loss=0.00519, critic_loss=8.09e+10, actor_loss=-1.13e+6, temp=642, temp_loss=-1.83e+3, mmd_loss=221, alpha=632]


2025-12-06 06:18.51 [info     ] BEAR_20251206050833: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.005045348882675171, 'time_algorithm_update': 0.03595014023780823, 'imitator_loss': 0.005195614226628095, 'critic_loss': 80932127715.328, 'actor_loss': -1132814.554625, 'temp': 641.8561823120117, 'temp_loss': -1831.4563024902343, 'mmd_loss': 221.02646032714844, 'alpha': 631.9639945068359, 'time_step': 0.04128550696372986, 'td_error': 40083473025.90893, 'value_scale': 1158396.664763202, 'discounted_advantage': -1423719.2745920334, 'initial_state': 1139725.875, 'diff_eval': 113421.54685807848} step=99000
2025-12-06 06:18.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.72it/s, imitator_loss=0.00521, critic_loss=8.94e+10, actor_loss=-1.19e+6, temp=715, temp_loss=-1.99e+3, mmd_loss=245, alpha=705]


2025-12-06 06:19.41 [info     ] BEAR_20251206050833: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.005339551687240601, 'time_algorithm_update': 0.03763456606864929, 'imitator_loss': 0.005200597563060001, 'critic_loss': 89391931052.032, 'actor_loss': -1186658.2565, 'temp': 715.067012512207, 'temp_loss': -1989.9293408203125, 'mmd_loss': 245.14532624816894, 'alpha': 705.3099752807617, 'time_step': 0.04328144574165344, 'td_error': 43730309958.245865, 'value_scale': 1211479.3364941324, 'discounted_advantage': -1470168.8653913988, 'initial_state': 1191514.25, 'diff_eval': 113416.02096335066} step=100000
2025-12-06 06:19.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.81it/s, imitator_loss=0.00518, critic_loss=9.75e+10, actor_loss=-1.24e+6, temp=796, temp_loss=-2.16e+3, mmd_loss=272, alpha=787]


2025-12-06 06:20.28 [info     ] BEAR_20251206050833: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.005051609516143799, 'time_algorithm_update': 0.03596934270858765, 'imitator_loss': 0.005177304522832856, 'critic_loss': 97575264980.992, 'actor_loss': -1242968.231125, 'temp': 796.5662357788086, 'temp_loss': -2165.5404428710935, 'mmd_loss': 272.12995388793945, 'alpha': 787.1540857543945, 'time_step': 0.041322528839111326, 'td_error': 48589395775.30488, 'value_scale': 1268289.6169321039, 'discounted_advantage': -1611130.3287529813, 'initial_state': 1247746.5, 'diff_eval': 113415.25473676044} step=101000
2025-12-06 06:20.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.62it/s, imitator_loss=0.00518, critic_loss=1.07e+11, actor_loss=-1.3e+6, temp=887, temp_loss=-2.35e+3, mmd_loss=302, alpha=878]


2025-12-06 06:21.16 [info     ] BEAR_20251206050833: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.005132176399230957, 'time_algorithm_update': 0.03620579743385315, 'imitator_loss': 0.005181443769484758, 'critic_loss': 107373476630.528, 'actor_loss': -1301149.5485, 'temp': 887.3110391235351, 'temp_loss': -2350.7448684082033, 'mmd_loss': 302.0238006286621, 'alpha': 878.5139843139649, 'time_step': 0.04164941000938416, 'td_error': 52725233624.59483, 'value_scale': 1326242.5810456832, 'discounted_advantage': -1629089.037282412, 'initial_state': 1304228.375, 'diff_eval': 113403.01448950564} step=102000
2025-12-06 06:21.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.43it/s, imitator_loss=0.00493, critic_loss=1.17e+11, actor_loss=-1.36e+6, temp=988, temp_loss=-2.55e+3, mmd_loss=335, alpha=980]


2025-12-06 06:22.04 [info     ] BEAR_20251206050833: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.005145523071289062, 'time_algorithm_update': 0.03650919342041015, 'imitator_loss': 0.004930297190556303, 'critic_loss': 117389972135.936, 'actor_loss': -1361439.582875, 'temp': 988.2588212890626, 'temp_loss': -2549.429044433594, 'mmd_loss': 335.2341651306152, 'alpha': 980.5009453735352, 'time_step': 0.04196220064163208, 'td_error': 57598384676.08943, 'value_scale': 1384663.6738788767, 'discounted_advantage': -1719955.667424236, 'initial_state': 1361955.875, 'diff_eval': 113400.20926971307} step=103000
2025-12-06 06:22.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.93it/s, imitator_loss=0.00505, critic_loss=1.29e+11, actor_loss=-1.42e+6, temp=1.1e+3, temp_loss=-2.76e+3, mmd_loss=371, alpha=1.09e+3]


2025-12-06 06:22.51 [info     ] BEAR_20251206050833: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.005008824348449707, 'time_algorithm_update': 0.035778574705123904, 'imitator_loss': 0.005048700002022088, 'critic_loss': 128817269334.016, 'actor_loss': -1423511.835375, 'temp': 1100.5866029052734, 'temp_loss': -2761.7407963867186, 'mmd_loss': 371.6236690979004, 'alpha': 1094.2575660400391, 'time_step': 0.04109174036979675, 'td_error': 62693667808.15067, 'value_scale': 1446172.0886944677, 'discounted_advantage': -1772788.7374733635, 'initial_state': 1422070.75, 'diff_eval': 113410.7811620797} step=104000
2025-12-06 06:22.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.24it/s, imitator_loss=0.00517, critic_loss=1.4e+11, actor_loss=-1.49e+6, temp=1.22e+3, temp_loss=-2.98e+3, mmd_loss=412, alpha=1.22e+3]


2025-12-06 06:23.40 [info     ] BEAR_20251206050833: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.005654480934143067, 'time_algorithm_update': 0.036341087818145754, 'imitator_loss': 0.005161544822854921, 'critic_loss': 140539154505.728, 'actor_loss': -1486032.96475, 'temp': 1225.4726630859375, 'temp_loss': -2985.408551025391, 'mmd_loss': 412.1813565979004, 'alpha': 1221.1808471679688, 'time_step': 0.042312971115112306, 'td_error': 68271201327.085266, 'value_scale': 1508584.702116513, 'discounted_advantage': -1849916.702666701, 'initial_state': 1484136.375, 'diff_eval': 113401.68108501578} step=105000
2025-12-06 06:23.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.00it/s, imitator_loss=0.00493, critic_loss=1.53e+11, actor_loss=-1.55e+6, temp=1.36e+3, temp_loss=-3.22e+3, mmd_loss=457, alpha=1.36e+3]


2025-12-06 06:24.29 [info     ] BEAR_20251206050833: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.005039697885513306, 'time_algorithm_update': 0.037450678586959836, 'imitator_loss': 0.004929120940854773, 'critic_loss': 153081117335.552, 'actor_loss': -1551282.13375, 'temp': 1364.462966796875, 'temp_loss': -3225.8134169921873, 'mmd_loss': 457.0460405883789, 'alpha': 1362.869364868164, 'time_step': 0.04278822898864746, 'td_error': 74236696125.88235, 'value_scale': 1570749.5938285834, 'discounted_advantage': -1938318.3921387913, 'initial_state': 1544484.5, 'diff_eval': 113384.29948751445} step=106000
2025-12-06 06:24.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.40it/s, imitator_loss=0.00495, critic_loss=1.67e+11, actor_loss=-1.61e+6, temp=1.52e+3, temp_loss=-3.47e+3, mmd_loss=506, alpha=1.52e+3]


2025-12-06 06:25.17 [info     ] BEAR_20251206050833: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.005131041288375855, 'time_algorithm_update': 0.03654501581192016, 'imitator_loss': 0.004950180017622188, 'critic_loss': 167518035582.976, 'actor_loss': -1614908.76925, 'temp': 1518.770262817383, 'temp_loss': -3467.5302541503906, 'mmd_loss': 506.4874181518555, 'alpha': 1520.888939819336, 'time_step': 0.04198987221717834, 'td_error': 79780117281.82484, 'value_scale': 1631344.1082355406, 'discounted_advantage': -1974051.7417387094, 'initial_state': 1603409.875, 'diff_eval': 113380.97086481957} step=107000
2025-12-06 06:25.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.40it/s, imitator_loss=0.00489, critic_loss=1.79e+11, actor_loss=-1.68e+6, temp=1.69e+3, temp_loss=-3.72e+3, mmd_loss=561, alpha=1.7e+3]


2025-12-06 06:26.05 [info     ] BEAR_20251206050833: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.005136399269104004, 'time_algorithm_update': 0.036525469541549685, 'imitator_loss': 0.004893015266861767, 'critic_loss': 179438415446.016, 'actor_loss': -1676273.3, 'temp': 1689.978673828125, 'temp_loss': -3722.0929116210937, 'mmd_loss': 560.9877760620117, 'alpha': 1697.144687866211, 'time_step': 0.041991243124008176, 'td_error': 85856941227.26706, 'value_scale': 1691646.327797569, 'discounted_advantage': -2069007.6556173353, 'initial_state': 1660188.5, 'diff_eval': 113370.25554487511} step=108000
2025-12-06 06:26.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.00it/s, imitator_loss=0.00485, critic_loss=1.94e+11, actor_loss=-1.74e+6, temp=1.88e+3, temp_loss=-3.99e+3, mmd_loss=621, alpha=1.89e+3]


2025-12-06 06:26.52 [info     ] BEAR_20251206050833: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.0049532814025878905, 'time_algorithm_update': 0.03572441506385803, 'imitator_loss': 0.004851147221168503, 'critic_loss': 193670036348.928, 'actor_loss': -1738959.567, 'temp': 1880.3604107666015, 'temp_loss': -3995.523032714844, 'mmd_loss': 621.6065410766602, 'alpha': 1893.828535522461, 'time_step': 0.04096671056747436, 'td_error': 92461575583.41534, 'value_scale': 1753993.0984388096, 'discounted_advantage': -2181904.342090851, 'initial_state': 1720547.5, 'diff_eval': 113371.10622631754} step=109000
2025-12-06 06:26.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.69it/s, imitator_loss=0.00488, critic_loss=2.08e+11, actor_loss=-1.8e+6, temp=2.09e+3, temp_loss=-4.29e+3, mmd_loss=689, alpha=2.11e+3]


2025-12-06 06:27.40 [info     ] BEAR_20251206050833: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.005115922451019287, 'time_algorithm_update': 0.036089999914169314, 'imitator_loss': 0.00487488708877936, 'critic_loss': 207927577747.456, 'actor_loss': -1804341.033125, 'temp': 2092.221609863281, 'temp_loss': -4291.474390625, 'mmd_loss': 688.9359748535156, 'alpha': 2113.4028732910156, 'time_step': 0.04151187252998352, 'td_error': 99397578322.99725, 'value_scale': 1819675.432523051, 'discounted_advantage': -2236691.1147088753, 'initial_state': 1783926.375, 'diff_eval': 113365.04749890925} step=110000
2025-12-06 06:27.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.02it/s, imitator_loss=0.00481, critic_loss=2.24e+11, actor_loss=-1.87e+6, temp=2.33e+3, temp_loss=-4.61e+3, mmd_loss=763, alpha=2.36e+3]


2025-12-06 06:28.27 [info     ] BEAR_20251206050833: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.005046823740005493, 'time_algorithm_update': 0.03561275863647461, 'imitator_loss': 0.004812421415466815, 'critic_loss': 224410033029.12, 'actor_loss': -1873137.996, 'temp': 2327.8348674316408, 'temp_loss': -4608.979207275391, 'mmd_loss': 763.4537559204101, 'alpha': 2358.380895019531, 'time_step': 0.040951040744781494, 'td_error': 106533988124.8833, 'value_scale': 1886644.71898575, 'discounted_advantage': -2286669.6737585715, 'initial_state': 1849827.875, 'diff_eval': 113343.63965274782} step=111000
2025-12-06 06:28.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.03it/s, imitator_loss=0.0048, critic_loss=2.42e+11, actor_loss=-1.94e+6, temp=2.59e+3, temp_loss=-4.93e+3, mmd_loss=846, alpha=2.63e+3]


2025-12-06 06:29.14 [info     ] BEAR_20251206050833: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.005006209373474121, 'time_algorithm_update': 0.03560266137123108, 'imitator_loss': 0.004795974859269336, 'critic_loss': 242273848983.552, 'actor_loss': -1943665.604125, 'temp': 2589.6704147949217, 'temp_loss': -4930.250284667969, 'mmd_loss': 845.9976715698242, 'alpha': 2631.773842529297, 'time_step': 0.04092056035995483, 'td_error': 114383082226.92188, 'value_scale': 1955540.2659262363, 'discounted_advantage': -2359563.0606786413, 'initial_state': 1916618.5, 'diff_eval': 113351.05211926546} step=112000
2025-12-06 06:29.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.68it/s, imitator_loss=0.00479, critic_loss=2.6e+11, actor_loss=-2.01e+6, temp=2.88e+3, temp_loss=-5.25e+3, mmd_loss=936, alpha=2.94e+3]


2025-12-06 06:30.02 [info     ] BEAR_20251206050833: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.005128426790237427, 'time_algorithm_update': 0.03611372947692871, 'imitator_loss': 0.004785933402366936, 'critic_loss': 260183698636.8, 'actor_loss': -2013296.332625, 'temp': 2879.927528564453, 'temp_loss': -5246.506309082031, 'mmd_loss': 936.6480331420898, 'alpha': 2936.739164794922, 'time_step': 0.04153905177116394, 'td_error': 122676620445.11057, 'value_scale': 2023968.849958089, 'discounted_advantage': -2460918.8101133695, 'initial_state': 1982587.375, 'diff_eval': 113317.97684501142} step=113000
2025-12-06 06:30.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.03it/s, imitator_loss=0.00478, critic_loss=2.78e+11, actor_loss=-2.08e+6, temp=3.2e+3, temp_loss=-5.57e+3, mmd_loss=1.04e+3, alpha=3.28e+3]


2025-12-06 06:30.49 [info     ] BEAR_20251206050833: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.005030077219009399, 'time_algorithm_update': 0.03560449337959289, 'imitator_loss': 0.004774301931494847, 'critic_loss': 277899858542.592, 'actor_loss': -2083985.173125, 'temp': 3201.6337060546875, 'temp_loss': -5567.887485839844, 'mmd_loss': 1036.613151977539, 'alpha': 3277.001092529297, 'time_step': 0.04093356156349182, 'td_error': 130209945241.96149, 'value_scale': 2088899.8325649623, 'discounted_advantage': -2521474.0849861014, 'initial_state': 2043966.75, 'diff_eval': 113334.08936732533} step=114000
2025-12-06 06:30.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.67it/s, imitator_loss=0.00465, critic_loss=2.99e+11, actor_loss=-2.15e+6, temp=3.56e+3, temp_loss=-5.9e+3, mmd_loss=1.15e+3, alpha=3.65e+3]


2025-12-06 06:31.37 [info     ] BEAR_20251206050833: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.005130046367645263, 'time_algorithm_update': 0.03614102149009705, 'imitator_loss': 0.004652205034391955, 'critic_loss': 299024048488.448, 'actor_loss': -2154893.19475, 'temp': 3558.0884660644533, 'temp_loss': -5901.1861499023435, 'mmd_loss': 1147.8877137451173, 'alpha': 3656.673286376953, 'time_step': 0.04156978678703308, 'td_error': 140160252345.41742, 'value_scale': 2160742.5555322715, 'discounted_advantage': -2686740.1042895587, 'initial_state': 2112956.25, 'diff_eval': 113337.29459431625} step=115000
2025-12-06 06:31.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.81it/s, imitator_loss=0.00479, critic_loss=3.18e+11, actor_loss=-2.23e+6, temp=3.95e+3, temp_loss=-6.26e+3, mmd_loss=1.27e+3, alpha=4.08e+3]


2025-12-06 06:32.24 [info     ] BEAR_20251206050833: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.005059609889984131, 'time_algorithm_update': 0.035943608283996585, 'imitator_loss': 0.004788625659421087, 'critic_loss': 318413220872.192, 'actor_loss': -2227880.1675, 'temp': 3954.1297158203124, 'temp_loss': -6261.99285546875, 'mmd_loss': 1270.1550172119141, 'alpha': 4080.230017578125, 'time_step': 0.041306036949157715, 'td_error': 149907867215.90283, 'value_scale': 2232489.9226739313, 'discounted_advantage': -2794042.9249327676, 'initial_state': 2182181.25, 'diff_eval': 113321.48501199369} step=116000
2025-12-06 06:32.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.59it/s, imitator_loss=0.00472, critic_loss=3.4e+11, actor_loss=-2.3e+6, temp=4.39e+3, temp_loss=-6.6e+3, mmd_loss=1.41e+3, alpha=4.55e+3]  


2025-12-06 06:33.12 [info     ] BEAR_20251206050833: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.005155872344970703, 'time_algorithm_update': 0.03622398138046265, 'imitator_loss': 0.004713266474893316, 'critic_loss': 339863139794.944, 'actor_loss': -2302912.07075, 'temp': 4393.079875488282, 'temp_loss': -6597.728480957031, 'mmd_loss': 1406.0523526611328, 'alpha': 4552.964641113282, 'time_step': 0.041684588432312014, 'td_error': 158466247052.42612, 'value_scale': 2305973.6391450125, 'discounted_advantage': -2818221.9475711877, 'initial_state': 2253252.75, 'diff_eval': 113326.64124091533} step=117000
2025-12-06 06:33.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.65it/s, imitator_loss=0.00476, critic_loss=3.64e+11, actor_loss=-2.38e+6, temp=4.88e+3, temp_loss=-6.92e+3, mmd_loss=1.55e+3, alpha=5.08e+3]


2025-12-06 06:34.00 [info     ] BEAR_20251206050833: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.005107701063156128, 'time_algorithm_update': 0.03620189166069031, 'imitator_loss': 0.0047614034693688155, 'critic_loss': 364130757066.752, 'actor_loss': -2379619.6935, 'temp': 4878.801921875, 'temp_loss': -6918.994801269531, 'mmd_loss': 1554.4300539550782, 'alpha': 5080.373522949219, 'time_step': 0.0416047694683075, 'td_error': 168650132905.5737, 'value_scale': 2379633.5123637887, 'discounted_advantage': -2873070.2233045483, 'initial_state': 2323395.25, 'diff_eval': 113293.75976085423} step=118000
2025-12-06 06:34.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.82it/s, imitator_loss=0.00457, critic_loss=3.89e+11, actor_loss=-2.46e+6, temp=5.41e+3, temp_loss=-7.25e+3, mmd_loss=1.72e+3, alpha=5.67e+3]


2025-12-06 06:34.49 [info     ] BEAR_20251206050833: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.0052375333309173586, 'time_algorithm_update': 0.03753627681732178, 'imitator_loss': 0.004566560953622684, 'critic_loss': 388917704753.152, 'actor_loss': -2459158.3105, 'temp': 5415.891503417969, 'temp_loss': -7253.229844726562, 'mmd_loss': 1721.082278930664, 'alpha': 5668.635168457031, 'time_step': 0.04308942937850952, 'td_error': 180374685974.70282, 'value_scale': 2459293.4601844093, 'discounted_advantage': -3025705.352768725, 'initial_state': 2401299.75, 'diff_eval': 113293.04817857126} step=119000
2025-12-06 06:34.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.60it/s, imitator_loss=0.00469, critic_loss=4.11e+11, actor_loss=-2.54e+6, temp=6.01e+3, temp_loss=-7.53e+3, mmd_loss=1.9e+3, alpha=6.32e+3]


2025-12-06 06:35.35 [info     ] BEAR_20251206050833: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.0046785190105438235, 'time_algorithm_update': 0.034980876207351685, 'imitator_loss': 0.00469165923842229, 'critic_loss': 411294584274.944, 'actor_loss': -2539770.57025, 'temp': 6010.391095703125, 'temp_loss': -7534.545231445312, 'mmd_loss': 1902.7866379394532, 'alpha': 6325.213758789063, 'time_step': 0.03994916105270386, 'td_error': 192267028662.87943, 'value_scale': 2538249.246018441, 'discounted_advantage': -3148265.472966629, 'initial_state': 2478117.5, 'diff_eval': 113293.49840479785} step=120000
2025-12-06 06:35.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.36it/s, imitator_loss=0.00461, critic_loss=4.42e+11, actor_loss=-2.62e+6, temp=6.66e+3, temp_loss=-7.85e+3, mmd_loss=2.1e+3, alpha=7.05e+3]


2025-12-06 06:36.24 [info     ] BEAR_20251206050833: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.005053613424301147, 'time_algorithm_update': 0.03674440813064575, 'imitator_loss': 0.00461946642328985, 'critic_loss': 442571017748.48, 'actor_loss': -2621970.797, 'temp': 6666.850309082031, 'temp_loss': -7855.440638183593, 'mmd_loss': 2105.6288420410156, 'alpha': 7057.020958007813, 'time_step': 0.04210600924491882, 'td_error': 205999379323.5233, 'value_scale': 2621042.6608340316, 'discounted_advantage': -3306270.2391538355, 'initial_state': 2558268.5, 'diff_eval': 113288.55092427078} step=121000
2025-12-06 06:36.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.23it/s, imitator_loss=0.00463, critic_loss=4.72e+11, actor_loss=-2.71e+6, temp=7.39e+3, temp_loss=-8.2e+3, mmd_loss=2.33e+3, alpha=7.87e+3]


2025-12-06 06:37.10 [info     ] BEAR_20251206050833: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.004959917306900025, 'time_algorithm_update': 0.03531701135635376, 'imitator_loss': 0.004624232338741422, 'critic_loss': 472279420764.16, 'actor_loss': -2714361.89275, 'temp': 7396.188721191406, 'temp_loss': -8203.116865722655, 'mmd_loss': 2329.6699494628906, 'alpha': 7874.009979003907, 'time_step': 0.04057030248641968, 'td_error': 221510726562.32852, 'value_scale': 2715854.2869865885, 'discounted_advantage': -3449481.642459235, 'initial_state': 2653206.25, 'diff_eval': 113268.00672521215} step=122000
2025-12-06 06:37.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.83it/s, imitator_loss=0.00454, critic_loss=5.02e+11, actor_loss=-2.81e+6, temp=8.2e+3, temp_loss=-8.39e+3, mmd_loss=2.57e+3, alpha=8.78e+3]


2025-12-06 06:38.00 [info     ] BEAR_20251206050833: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.005313608646392822, 'time_algorithm_update': 0.03743499207496643, 'imitator_loss': 0.00453970816032961, 'critic_loss': 502149755994.112, 'actor_loss': -2810263.709, 'temp': 8200.3871171875, 'temp_loss': -8387.9116796875, 'mmd_loss': 2575.5750002441405, 'alpha': 8785.53904296875, 'time_step': 0.04305995297431946, 'td_error': 235149685987.0016, 'value_scale': 2804936.2691743504, 'discounted_advantage': -3491796.2851012894, 'initial_state': 2740612.5, 'diff_eval': 113298.14998288406} step=123000
2025-12-06 06:38.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.73it/s, imitator_loss=0.00453, critic_loss=5.44e+11, actor_loss=-2.9e+6, temp=9.07e+3, temp_loss=-8.4e+3, mmd_loss=2.84e+3, alpha=9.8e+3]  


2025-12-06 06:38.47 [info     ] BEAR_20251206050833: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.005086101055145263, 'time_algorithm_update': 0.03607327651977539, 'imitator_loss': 0.004533164410386235, 'critic_loss': 543377373462.528, 'actor_loss': -2903118.994, 'temp': 9076.261323242188, 'temp_loss': -8406.212863769531, 'mmd_loss': 2839.5185939941407, 'alpha': 9801.399275390624, 'time_step': 0.04145991325378418, 'td_error': 250205212788.2584, 'value_scale': 2894163.375209556, 'discounted_advantage': -3594678.8932406185, 'initial_state': 2828018.5, 'diff_eval': 113242.05263430395} step=124000
2025-12-06 06:38.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.67it/s, imitator_loss=0.00447, critic_loss=5.7e+11, actor_loss=-2.99e+6, temp=1e+4, temp_loss=-8.26e+3, mmd_loss=3.13e+3, alpha=1.09e+4]   


2025-12-06 06:39.35 [info     ] BEAR_20251206050833: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.005095398187637329, 'time_algorithm_update': 0.03611338710784912, 'imitator_loss': 0.0044679289031773805, 'critic_loss': 569514372694.016, 'actor_loss': -2992019.0095, 'temp': 10028.004185546875, 'temp_loss': -8264.139987304687, 'mmd_loss': 3127.7006323242185, 'alpha': 10932.824249023437, 'time_step': 0.04152611947059631, 'td_error': 264863361347.44284, 'value_scale': 2980724.6114836545, 'discounted_advantage': -3717835.1760421554, 'initial_state': 2912786.5, 'diff_eval': 113266.45161494265} step=125000
2025-12-06 06:39.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.59it/s, imitator_loss=0.00462, critic_loss=6.04e+11, actor_loss=-3.08e+6, temp=1.11e+4, temp_loss=-7.95e+3, mmd_loss=3.44e+3, alpha=1.22e+4]


2025-12-06 06:40.23 [info     ] BEAR_20251206050833: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.005141024589538574, 'time_algorithm_update': 0.03624909567832947, 'imitator_loss': 0.0046235135397873816, 'critic_loss': 603754707746.816, 'actor_loss': -3075696.3835, 'temp': 11055.866599609375, 'temp_loss': -7951.488776367188, 'mmd_loss': 3442.6920791015623, 'alpha': 12193.802024414063, 'time_step': 0.041693255662918094, 'td_error': 279323369795.6043, 'value_scale': 3061565.703792959, 'discounted_advantage': -3827943.523314871, 'initial_state': 2989702.5, 'diff_eval': 113253.93100998807} step=126000
2025-12-06 06:40.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.71it/s, imitator_loss=0.00454, critic_loss=6.36e+11, actor_loss=-3.16e+6, temp=1.22e+4, temp_loss=-7.5e+3, mmd_loss=3.79e+3, alpha=1.36e+4]


2025-12-06 06:41.10 [info     ] BEAR_20251206050833: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.0050905213356018065, 'time_algorithm_update': 0.03610607099533081, 'imitator_loss': 0.004546030062949285, 'critic_loss': 635559203143.68, 'actor_loss': -3159207.46975, 'temp': 12165.846762695313, 'temp_loss': -7498.838076171875, 'mmd_loss': 3791.9305837402344, 'alpha': 13599.622240234376, 'time_step': 0.041496143341064454, 'td_error': 291045580072.60657, 'value_scale': 3140589.603415759, 'discounted_advantage': -3819156.766359888, 'initial_state': 3065315.25, 'diff_eval': 113270.14062794176} step=127000
2025-12-06 06:41.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.23it/s, imitator_loss=0.00443, critic_loss=6.66e+11, actor_loss=-3.24e+6, temp=1.33e+4, temp_loss=-6.67e+3, mmd_loss=4.16e+3, alpha=1.52e+4]


2025-12-06 06:41.59 [info     ] BEAR_20251206050833: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.0052012019157409665, 'time_algorithm_update': 0.036836248636245725, 'imitator_loss': 0.004433201462496072, 'critic_loss': 666239984926.72, 'actor_loss': -3237486.38225, 'temp': 13335.213805664063, 'temp_loss': -6668.06948449707, 'mmd_loss': 4164.048759277343, 'alpha': 15165.676909179687, 'time_step': 0.042349877834320065, 'td_error': 306891256569.0605, 'value_scale': 3216800.78248114, 'discounted_advantage': -4031603.9900732846, 'initial_state': 3136657.5, 'diff_eval': 113277.272713136} step=128000
2025-12-06 06:41.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.69it/s, imitator_loss=0.0046, critic_loss=7e+11, actor_loss=-3.32e+6, temp=1.46e+4, temp_loss=-5.85e+3, mmd_loss=4.57e+3, alpha=1.69e+4]   


2025-12-06 06:42.46 [info     ] BEAR_20251206050833: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.005076265811920166, 'time_algorithm_update': 0.03612989950180054, 'imitator_loss': 0.004604719994356856, 'critic_loss': 700299720294.4, 'actor_loss': -3316884.3285, 'temp': 14562.671561523437, 'temp_loss': -5847.488609375, 'mmd_loss': 4572.276127441406, 'alpha': 16909.798673828125, 'time_step': 0.04151052021980286, 'td_error': 322613900828.7779, 'value_scale': 3297360.5850796313, 'discounted_advantage': -4181025.8136185594, 'initial_state': 3212250.5, 'diff_eval': 113299.52894864717} step=129000
2025-12-06 06:42.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.97it/s, imitator_loss=0.00444, critic_loss=7.32e+11, actor_loss=-3.41e+6, temp=1.59e+4, temp_loss=-5.39e+3, mmd_loss=5.05e+3, alpha=1.88e+4]


2025-12-06 06:43.34 [info     ] BEAR_20251206050833: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.0050034248828887936, 'time_algorithm_update': 0.03572329545021057, 'imitator_loss': 0.004436919912463054, 'critic_loss': 732852925136.896, 'actor_loss': -3407163.4895, 'temp': 15893.1706875, 'temp_loss': -5387.851362182617, 'mmd_loss': 5050.200893554687, 'alpha': 18858.924734375, 'time_step': 0.04101873135566712, 'td_error': 342932296603.78937, 'value_scale': 3396030.8746856665, 'discounted_advantage': -4302219.436411872, 'initial_state': 3309205.75, 'diff_eval': 113286.77154675314} step=130000
2025-12-06 06:43.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.68it/s, imitator_loss=0.00432, critic_loss=7.8e+11, actor_loss=-3.51e+6, temp=1.73e+4, temp_loss=-4.44e+3, mmd_loss=5.56e+3, alpha=2.1e+4] 


2025-12-06 06:44.21 [info     ] BEAR_20251206050833: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.005137337923049927, 'time_algorithm_update': 0.036101429224014284, 'imitator_loss': 0.004318612878676504, 'critic_loss': 780009710714.88, 'actor_loss': -3508491.2895, 'temp': 17279.602125, 'temp_loss': -4423.261363677979, 'mmd_loss': 5564.433452148438, 'alpha': 21028.73065234375, 'time_step': 0.041534825563430786, 'td_error': 362575140612.1173, 'value_scale': 3500810.427808047, 'discounted_advantage': -4367142.625499299, 'initial_state': 3414028.75, 'diff_eval': 113288.67134451086} step=131000
2025-12-06 06:44.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.18it/s, imitator_loss=0.00438, critic_loss=8.29e+11, actor_loss=-3.61e+6, temp=1.86e+4, temp_loss=-3.29e+3, mmd_loss=5.76e+3, alpha=2.2e+4]


2025-12-06 06:45.10 [info     ] BEAR_20251206050833: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.005100691080093383, 'time_algorithm_update': 0.03701282048225403, 'imitator_loss': 0.004379937552148477, 'critic_loss': 828720373202.944, 'actor_loss': -3615183.58775, 'temp': 18627.95508203125, 'temp_loss': -3284.057089050293, 'mmd_loss': 5755.332157714844, 'alpha': 22026.466796875, 'time_step': 0.04241804051399231, 'td_error': 386323265536.38074, 'value_scale': 3602607.7318734284, 'discounted_advantage': -4622341.366350574, 'initial_state': 3513528.25, 'diff_eval': 113291.79977828839} step=132000
2025-12-06 06:45.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.79it/s, imitator_loss=0.00445, critic_loss=8.83e+11, actor_loss=-3.72e+6, temp=1.99e+4, temp_loss=-2.24e+3, mmd_loss=5.7e+3, alpha=2.2e+4]


2025-12-06 06:45.57 [info     ] BEAR_20251206050833: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.0049985449314117435, 'time_algorithm_update': 0.036031803369522095, 'imitator_loss': 0.00445354518480599, 'critic_loss': 884151499161.6, 'actor_loss': -3723625.389, 'temp': 19888.43858984375, 'temp_loss': -2223.4527490844725, 'mmd_loss': 5695.985974609375, 'alpha': 22026.466796875, 'time_step': 0.04133530879020691, 'td_error': 408322149166.6899, 'value_scale': 3707005.8646269906, 'discounted_advantage': -4711796.4068793105, 'initial_state': 3614793.5, 'diff_eval': 113285.82461560641} step=133000
2025-12-06 06:45.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.97it/s, imitator_loss=0.00437, critic_loss=9.27e+11, actor_loss=-3.83e+6, temp=2.09e+4, temp_loss=-1.59e+3, mmd_loss=5.65e+3, alpha=2.2e+4]


2025-12-06 06:46.44 [info     ] BEAR_20251206050833: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.0049914798736572265, 'time_algorithm_update': 0.035752527236938475, 'imitator_loss': 0.004370257509639487, 'critic_loss': 926677390983.168, 'actor_loss': -3831201.238, 'temp': 20895.1941328125, 'temp_loss': -1584.7307577667236, 'mmd_loss': 5652.876116699219, 'alpha': 22026.466796875, 'time_step': 0.04104473638534546, 'td_error': 432559452123.97455, 'value_scale': 3817996.714270746, 'discounted_advantage': -4842468.122328786, 'initial_state': 3724174.75, 'diff_eval': 113313.92846155286} step=134000
2025-12-06 06:46.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.69it/s, imitator_loss=0.00437, critic_loss=9.82e+11, actor_loss=-3.95e+6, temp=2.18e+4, temp_loss=-1e+3, mmd_loss=5.63e+3, alpha=2.2e+4]  


2025-12-06 06:47.32 [info     ] BEAR_20251206050833: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.0050338592529296875, 'time_algorithm_update': 0.03617303824424744, 'imitator_loss': 0.0043681166742462665, 'critic_loss': 982328695783.424, 'actor_loss': -3947204.619, 'temp': 21801.80403125, 'temp_loss': -1003.8476024017334, 'mmd_loss': 5627.1574135742185, 'alpha': 22026.466796875, 'time_step': 0.041509641885757446, 'td_error': 460555475747.89185, 'value_scale': 3932251.018860017, 'discounted_advantage': -5047181.030023833, 'initial_state': 3834767.75, 'diff_eval': 113277.029779439} step=135000
2025-12-06 06:47.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.58it/s, imitator_loss=0.00433, critic_loss=1.05e+12, actor_loss=-4.07e+6, temp=2.27e+4, temp_loss=-1.12e+3, mmd_loss=5.62e+3, alpha=2.2e+4]


2025-12-06 06:48.20 [info     ] BEAR_20251206050833: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.0051424834728240965, 'time_algorithm_update': 0.03625483345985413, 'imitator_loss': 0.004335878737969324, 'critic_loss': 1047458048311.296, 'actor_loss': -4069568.6405, 'temp': 22684.32298046875, 'temp_loss': -1106.844568572998, 'mmd_loss': 5622.292816894531, 'alpha': 22026.466796875, 'time_step': 0.04171240162849426, 'td_error': 492035470320.4254, 'value_scale': 4057491.7111274097, 'discounted_advantage': -5246281.417782627, 'initial_state': 3959629.0, 'diff_eval': 113245.2365525942} step=136000
2025-12-06 06:48.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.12it/s, imitator_loss=0.00429, critic_loss=1.11e+12, actor_loss=-4.2e+6, temp=2.36e+4, temp_loss=-811, mmd_loss=5.61e+3, alpha=2.2e+4]    


2025-12-06 06:49.07 [info     ] BEAR_20251206050833: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.005008605718612671, 'time_algorithm_update': 0.03551613640785217, 'imitator_loss': 0.004288090796675533, 'critic_loss': 1113717307867.136, 'actor_loss': -4198018.221, 'temp': 23574.1909609375, 'temp_loss': -825.4634174194335, 'mmd_loss': 5610.614853515625, 'alpha': 22026.466796875, 'time_step': 0.040813069581985474, 'td_error': 521354301785.2021, 'value_scale': 4185302.5832984075, 'discounted_advantage': -5319392.698585905, 'initial_state': 4086329.5, 'diff_eval': 113206.48915042542} step=137000
2025-12-06 06:49.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.65it/s, imitator_loss=0.00425, critic_loss=1.19e+12, actor_loss=-4.33e+6, temp=2.43e+4, temp_loss=-697, mmd_loss=5.61e+3, alpha=2.2e+4] 


2025-12-06 06:49.54 [info     ] BEAR_20251206050833: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.005103869438171387, 'time_algorithm_update': 0.03620009136199951, 'imitator_loss': 0.004261071336222813, 'critic_loss': 1187258769539.072, 'actor_loss': -4327737.383, 'temp': 24325.877263671875, 'temp_loss': -688.5678579864502, 'mmd_loss': 5605.439520996094, 'alpha': 22026.466796875, 'time_step': 0.04159707522392273, 'td_error': 557478952501.987, 'value_scale': 4313284.902242246, 'discounted_advantage': -5606737.619298962, 'initial_state': 4213542.5, 'diff_eval': 113232.71802245121} step=138000
2025-12-06 06:49.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.96it/s, imitator_loss=0.00423, critic_loss=1.26e+12, actor_loss=-4.46e+6, temp=2.49e+4, temp_loss=-540, mmd_loss=5.6e+3, alpha=2.2e+4] 


2025-12-06 06:50.41 [info     ] BEAR_20251206050833: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.005036304473876953, 'time_algorithm_update': 0.035718047857284545, 'imitator_loss': 0.004229516125749797, 'critic_loss': 1264326117818.368, 'actor_loss': -4458301.7135, 'temp': 24945.871478515626, 'temp_loss': -544.5902738723755, 'mmd_loss': 5602.865099121093, 'alpha': 22026.466796875, 'time_step': 0.04105446243286133, 'td_error': 585250792193.0842, 'value_scale': 4438297.903918692, 'discounted_advantage': -5619285.60096698, 'initial_state': 4337091.0, 'diff_eval': 113274.22235844142} step=139000
2025-12-06 06:50.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.97it/s, imitator_loss=0.00417, critic_loss=1.33e+12, actor_loss=-4.59e+6, temp=2.56e+4, temp_loss=-753, mmd_loss=5.61e+3, alpha=2.2e+4]   


2025-12-06 06:51.29 [info     ] BEAR_20251206050833: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.004945532083511352, 'time_algorithm_update': 0.035743357419967654, 'imitator_loss': 0.004171499205287546, 'critic_loss': 1334357905768.448, 'actor_loss': -4586323.749, 'temp': 25579.209541015625, 'temp_loss': -754.5755587005615, 'mmd_loss': 5611.243708496094, 'alpha': 22026.466796875, 'time_step': 0.04099014735221863, 'td_error': 619616859983.2659, 'value_scale': 4566548.868084661, 'discounted_advantage': -5759870.176740079, 'initial_state': 4463030.5, 'diff_eval': 113232.43968751944} step=140000
2025-12-06 06:51.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.91it/s, imitator_loss=0.00423, critic_loss=1.41e+12, actor_loss=-4.72e+6, temp=2.63e+4, temp_loss=-674, mmd_loss=5.61e+3, alpha=2.2e+4] 


2025-12-06 06:52.16 [info     ] BEAR_20251206050833: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.005083507299423218, 'time_algorithm_update': 0.03574545001983643, 'imitator_loss': 0.004235498168505728, 'critic_loss': 1409694474829.824, 'actor_loss': -4717615.828, 'temp': 26320.043611328125, 'temp_loss': -677.2329464111328, 'mmd_loss': 5609.798395019531, 'alpha': 22026.466796875, 'time_step': 0.04112268972396851, 'td_error': 659598694606.5958, 'value_scale': 4698411.349224644, 'discounted_advantage': -6040915.596506225, 'initial_state': 4593094.5, 'diff_eval': 113237.66923334933} step=141000
2025-12-06 06:52.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.83it/s, imitator_loss=0.00424, critic_loss=1.49e+12, actor_loss=-4.85e+6, temp=2.7e+4, temp_loss=-918, mmd_loss=5.62e+3, alpha=2.2e+4]    


2025-12-06 06:53.03 [info     ] BEAR_20251206050833: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.00510168981552124, 'time_algorithm_update': 0.03588097858428955, 'imitator_loss': 0.004238695214502514, 'critic_loss': 1487587045081.088, 'actor_loss': -4854359.697, 'temp': 27028.669611328125, 'temp_loss': -916.3497620239258, 'mmd_loss': 5616.510809570313, 'alpha': 22026.466796875, 'time_step': 0.04127690577507019, 'td_error': 696096653754.5201, 'value_scale': 4836203.17917016, 'discounted_advantage': -6133249.724843604, 'initial_state': 4729291.0, 'diff_eval': 113289.80649440666} step=142000
2025-12-06 06:53.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.78it/s, imitator_loss=0.00419, critic_loss=1.59e+12, actor_loss=-5e+6, temp=2.8e+4, temp_loss=-828, mmd_loss=5.61e+3, alpha=2.2e+4]       


2025-12-06 06:53.51 [info     ] BEAR_20251206050833: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.005111496210098267, 'time_algorithm_update': 0.03595748066902161, 'imitator_loss': 0.0041885027978569266, 'critic_loss': 1590134431612.928, 'actor_loss': -4997996.7625, 'temp': 27955.977697265625, 'temp_loss': -826.8917716217042, 'mmd_loss': 5611.3440390625, 'alpha': 22026.466796875, 'time_step': 0.04138090848922729, 'td_error': 745273729801.2885, 'value_scale': 4982490.970033529, 'discounted_advantage': -6490924.006086043, 'initial_state': 4873678.5, 'diff_eval': 113271.88282035322} step=143000
2025-12-06 06:53.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.61it/s, imitator_loss=0.00421, critic_loss=1.69e+12, actor_loss=-5.15e+6, temp=2.88e+4, temp_loss=-871, mmd_loss=5.61e+3, alpha=2.2e+4]  


2025-12-06 06:54.39 [info     ] BEAR_20251206050833: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.005124816656112671, 'time_algorithm_update': 0.03627643132209778, 'imitator_loss': 0.004204164679627866, 'critic_loss': 1687749849448.448, 'actor_loss': -5148849.907, 'temp': 28815.67112109375, 'temp_loss': -871.6877859039307, 'mmd_loss': 5610.903307128906, 'alpha': 22026.466796875, 'time_step': 0.04168990063667297, 'td_error': 792196898164.804, 'value_scale': 5135346.614836547, 'discounted_advantage': -6669854.546840089, 'initial_state': 5025514.5, 'diff_eval': 113283.564717794} step=144000
2025-12-06 06:54.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.75it/s, imitator_loss=0.00415, critic_loss=1.8e+12, actor_loss=-5.31e+6, temp=2.96e+4, temp_loss=-659, mmd_loss=5.6e+3, alpha=2.2e+4] 


2025-12-06 06:55.27 [info     ] BEAR_20251206050833: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.005293267011642456, 'time_algorithm_update': 0.035821964025497435, 'imitator_loss': 0.0041373704895377155, 'critic_loss': 1802618697678.848, 'actor_loss': -5306030.0665, 'temp': 29583.32069921875, 'temp_loss': -704.3708225402833, 'mmd_loss': 5596.600482421875, 'alpha': 22026.466796875, 'time_step': 0.04142569947242737, 'td_error': 839019523120.7595, 'value_scale': 5292593.677493713, 'discounted_advantage': -6779385.247904997, 'initial_state': 5180372.0, 'diff_eval': 113223.7754694642} step=145000
2025-12-06 06:55.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.88it/s, imitator_loss=0.0041, critic_loss=1.89e+12, actor_loss=-5.46e+6, temp=3.03e+4, temp_loss=-817, mmd_loss=5.61e+3, alpha=2.2e+4]


2025-12-06 06:56.14 [info     ] BEAR_20251206050833: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.005069109201431274, 'time_algorithm_update': 0.0358525824546814, 'imitator_loss': 0.0040957461448851975, 'critic_loss': 1892841110241.28, 'actor_loss': -5461503.77, 'temp': 30307.880697265624, 'temp_loss': -805.7634423065185, 'mmd_loss': 5606.856235839844, 'alpha': 22026.466796875, 'time_step': 0.04121070432662964, 'td_error': 889659613331.385, 'value_scale': 5449673.624476111, 'discounted_advantage': -6943745.409170392, 'initial_state': 5334580.0, 'diff_eval': 113269.43167327855} step=146000
2025-12-06 06:56.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.14it/s, imitator_loss=0.00426, critic_loss=2.02e+12, actor_loss=-5.62e+6, temp=3.12e+4, temp_loss=-764, mmd_loss=5.6e+3, alpha=2.2e+4] 


2025-12-06 06:57.03 [info     ] BEAR_20251206050833: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.0050857458114624025, 'time_algorithm_update': 0.037115054607391354, 'imitator_loss': 0.004259970653103664, 'critic_loss': 2026477418577.92, 'actor_loss': -5622187.8025, 'temp': 31174.826017578125, 'temp_loss': -791.2615510864258, 'mmd_loss': 5603.653431152344, 'alpha': 22026.466796875, 'time_step': 0.04250220036506653, 'td_error': 945124522439.8973, 'value_scale': 5607494.849748533, 'discounted_advantage': -7187228.56824014, 'initial_state': 5490907.5, 'diff_eval': 113241.01744189586} step=147000
2025-12-06 06:57.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.54it/s, imitator_loss=0.00411, critic_loss=2.13e+12, actor_loss=-5.78e+6, temp=3.2e+4, temp_loss=-875, mmd_loss=5.61e+3, alpha=2.2e+4]   


2025-12-06 06:57.51 [info     ] BEAR_20251206050833: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.0050006117820739744, 'time_algorithm_update': 0.03650592803955078, 'imitator_loss': 0.004109619453549385, 'critic_loss': 2136315826929.664, 'actor_loss': -5783102.969, 'temp': 32036.622630859376, 'temp_loss': -878.2434752807617, 'mmd_loss': 5606.222634277344, 'alpha': 22026.466796875, 'time_step': 0.041806759119033816, 'td_error': 1007148509710.4375, 'value_scale': 5771704.910729254, 'discounted_advantage': -7515328.685068646, 'initial_state': 5654855.5, 'diff_eval': 113280.40766203274} step=148000
2025-12-06 06:57.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.60it/s, imitator_loss=0.00419, critic_loss=2.25e+12, actor_loss=-5.95e+6, temp=3.28e+4, temp_loss=-772, mmd_loss=5.6e+3, alpha=2.2e+4]  


2025-12-06 06:58.38 [info     ] BEAR_20251206050833: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.005147494792938232, 'time_algorithm_update': 0.036239732027053835, 'imitator_loss': 0.004187663644435816, 'critic_loss': 2252100094525.44, 'actor_loss': -5946913.397, 'temp': 32851.2628828125, 'temp_loss': -790.2592702941895, 'mmd_loss': 5599.622756835937, 'alpha': 22026.466796875, 'time_step': 0.04170322823524475, 'td_error': 1058425688673.0436, 'value_scale': 5932514.309723387, 'discounted_advantage': -7592499.026049401, 'initial_state': 5814520.5, 'diff_eval': 113275.57365335623} step=149000
2025-12-06 06:58.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.89it/s, imitator_loss=0.00407, critic_loss=2.39e+12, actor_loss=-6.11e+6, temp=3.37e+4, temp_loss=-883, mmd_loss=5.61e+3, alpha=2.2e+4]   


2025-12-06 06:59.26 [info     ] BEAR_20251206050833: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.005078430652618408, 'time_algorithm_update': 0.03578915381431579, 'imitator_loss': 0.004067158082849346, 'critic_loss': 2389742883962.88, 'actor_loss': -6109573.5095, 'temp': 33748.49946484375, 'temp_loss': -870.7494832763672, 'mmd_loss': 5606.305738769532, 'alpha': 22026.466796875, 'time_step': 0.04116523003578186, 'td_error': 1121247973927.4683, 'value_scale': 6103044.2080888515, 'discounted_advantage': -7779638.429199092, 'initial_state': 5984736.0, 'diff_eval': 113270.77351146612} step=150000
2025-12-06 06:59.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.44it/s, imitator_loss=0.00404, critic_loss=2.51e+12, actor_loss=-6.27e+6, temp=3.47e+4, temp_loss=-830, mmd_loss=5.6e+3, alpha=2.2e+4]    


2025-12-06 07:00.14 [info     ] BEAR_20251206050833: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.005180264711380005, 'time_algorithm_update': 0.03647904944419861, 'imitator_loss': 0.00403501262771897, 'critic_loss': 2509607409287.168, 'actor_loss': -6275420.5085, 'temp': 34694.88452734375, 'temp_loss': -849.6234435119629, 'mmd_loss': 5603.82049609375, 'alpha': 22026.466796875, 'time_step': 0.04196146035194397, 'td_error': 1180361918275.935, 'value_scale': 6266446.346186086, 'discounted_advantage': -7950192.063432335, 'initial_state': 6145221.0, 'diff_eval': 113263.7049921872} step=151000
2025-12-06 07:00.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.80it/s, imitator_loss=0.00402, critic_loss=2.65e+12, actor_loss=-6.44e+6, temp=3.54e+4, temp_loss=-926, mmd_loss=5.6e+3, alpha=2.2e+4] 


2025-12-06 07:01.02 [info     ] BEAR_20251206050833: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.005086097240447998, 'time_algorithm_update': 0.03591185808181763, 'imitator_loss': 0.004023786825826391, 'critic_loss': 2651046756614.144, 'actor_loss': -6443330.4465, 'temp': 35417.55773828125, 'temp_loss': -943.8600442810059, 'mmd_loss': 5602.049702636718, 'alpha': 22026.466796875, 'time_step': 0.04129838943481445, 'td_error': 1248762820416.6902, 'value_scale': 6442595.520117351, 'discounted_advantage': -8101557.994730205, 'initial_state': 6317906.5, 'diff_eval': 113250.9915858586} step=152000
2025-12-06 07:01.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.63it/s, imitator_loss=0.00403, critic_loss=2.8e+12, actor_loss=-6.62e+6, temp=3.65e+4, temp_loss=-1.04e+3, mmd_loss=5.61e+3, alpha=2.2e+4]


2025-12-06 07:01.49 [info     ] BEAR_20251206050833: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.005130152940750122, 'time_algorithm_update': 0.036176213026046754, 'imitator_loss': 0.004033255533780903, 'critic_loss': 2798123902435.328, 'actor_loss': -6617666.06, 'temp': 36550.570625, 'temp_loss': -1031.0715195159912, 'mmd_loss': 5613.353249511719, 'alpha': 22026.466796875, 'time_step': 0.04162981963157654, 'td_error': 1324574070987.526, 'value_scale': 6620736.920368819, 'discounted_advantage': -8446202.928437306, 'initial_state': 6495603.0, 'diff_eval': 113240.30913752822} step=153000
2025-12-06 07:01.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.89it/s, imitator_loss=0.00405, critic_loss=2.95e+12, actor_loss=-6.79e+6, temp=3.75e+4, temp_loss=-929, mmd_loss=5.61e+3, alpha=2.2e+4]   


2025-12-06 07:02.37 [info     ] BEAR_20251206050833: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.00501040768623352, 'time_algorithm_update': 0.03589543342590332, 'imitator_loss': 0.004047188419732265, 'critic_loss': 2957556388593.664, 'actor_loss': -6795702.573, 'temp': 37498.1656875, 'temp_loss': -913.3917842712402, 'mmd_loss': 5606.1778247070315, 'alpha': 22026.466796875, 'time_step': 0.04119006276130676, 'td_error': 1400886109978.192, 'value_scale': 6803152.743503772, 'discounted_advantage': -8657534.087043913, 'initial_state': 6674137.5, 'diff_eval': 113282.34683340094} step=154000
2025-12-06 07:02.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.06it/s, imitator_loss=0.00401, critic_loss=3.13e+12, actor_loss=-6.98e+6, temp=3.85e+4, temp_loss=-1.08e+3, mmd_loss=5.61e+3, alpha=2.2e+4]


2025-12-06 07:03.23 [info     ] BEAR_20251206050833: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.00507340407371521, 'time_algorithm_update': 0.0354916615486145, 'imitator_loss': 0.004006863549584523, 'critic_loss': 3130526374821.888, 'actor_loss': -6980201.2275, 'temp': 38521.3284296875, 'temp_loss': -1066.3220097351075, 'mmd_loss': 5608.622658691406, 'alpha': 22026.466796875, 'time_step': 0.04086690902709961, 'td_error': 1485403836516.314, 'value_scale': 6997030.547569154, 'discounted_advantage': -9015022.47034565, 'initial_state': 6868572.0, 'diff_eval': 113254.2869641671} step=155000
2025-12-06 07:03.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.92it/s, imitator_loss=0.00399, critic_loss=3.31e+12, actor_loss=-7.17e+6, temp=3.95e+4, temp_loss=-1.21e+3, mmd_loss=5.61e+3, alpha=2.2e+4]


2025-12-06 07:04.11 [info     ] BEAR_20251206050833: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.0050512545108795165, 'time_algorithm_update': 0.035751156568527225, 'imitator_loss': 0.003984989737393334, 'critic_loss': 3313810916769.792, 'actor_loss': -7172714.5925, 'temp': 39505.8514296875, 'temp_loss': -1213.0511128540038, 'mmd_loss': 5611.890348632813, 'alpha': 22026.466796875, 'time_step': 0.041109989404678346, 'td_error': 1580265735554.5017, 'value_scale': 7196319.273889354, 'discounted_advantage': -9364468.587074218, 'initial_state': 7064202.0, 'diff_eval': 113235.84248699946} step=156000
2025-12-06 07:04.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.99it/s, imitator_loss=0.00397, critic_loss=3.51e+12, actor_loss=-7.37e+6, temp=4.08e+4, temp_loss=-785, mmd_loss=5.6e+3, alpha=2.2e+4]    


2025-12-06 07:05.00 [info     ] BEAR_20251206050833: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.005099897623062134, 'time_algorithm_update': 0.036564409255981443, 'imitator_loss': 0.003979447782621719, 'critic_loss': 3506064881352.704, 'actor_loss': -7373739.582, 'temp': 40838.46517578125, 'temp_loss': -762.3125833435058, 'mmd_loss': 5597.01078515625, 'alpha': 22026.466796875, 'time_step': 0.04195470404624939, 'td_error': 1660920424113.58, 'value_scale': 7392715.104568315, 'discounted_advantage': -9451499.946267094, 'initial_state': 7260827.5, 'diff_eval': 113279.62224301948} step=157000
2025-12-06 07:05.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.02it/s, imitator_loss=0.00396, critic_loss=3.68e+12, actor_loss=-7.57e+6, temp=4.15e+4, temp_loss=-781, mmd_loss=5.6e+3, alpha=2.2e+4]    


2025-12-06 07:05.47 [info     ] BEAR_20251206050833: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.0050228071212768555, 'time_algorithm_update': 0.03559267568588257, 'imitator_loss': 0.003957614628132432, 'critic_loss': 3681401306349.568, 'actor_loss': -7568046.808, 'temp': 41542.395015625, 'temp_loss': -779.5995739440918, 'mmd_loss': 5596.230814453125, 'alpha': 22026.466796875, 'time_step': 0.04092238402366638, 'td_error': 1755433199522.58, 'value_scale': 7599076.814333613, 'discounted_advantage': -9629593.913816601, 'initial_state': 7465133.5, 'diff_eval': 113251.4731920343} step=158000
2025-12-06 07:05.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.69it/s, imitator_loss=0.00393, critic_loss=3.88e+12, actor_loss=-7.76e+6, temp=4.24e+4, temp_loss=-1.01e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:06.37 [info     ] BEAR_20251206050833: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.005758938789367676, 'time_algorithm_update': 0.03733148241043091, 'imitator_loss': 0.003939679448609241, 'critic_loss': 3880065185611.776, 'actor_loss': -7765632.1115, 'temp': 42402.8000390625, 'temp_loss': -1006.644329574585, 'mmd_loss': 5604.0798188476565, 'alpha': 22026.466796875, 'time_step': 0.043374290227890015, 'td_error': 1856278953965.6133, 'value_scale': 7797038.79086337, 'discounted_advantage': -10066939.61026609, 'initial_state': 7662905.5, 'diff_eval': 113273.46091092183} step=159000
2025-12-06 07:06.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.40it/s, imitator_loss=0.00398, critic_loss=4.11e+12, actor_loss=-7.97e+6, temp=4.35e+4, temp_loss=-1.01e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:07.25 [info     ] BEAR_20251206050833: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.005153785705566407, 'time_algorithm_update': 0.03658133029937744, 'imitator_loss': 0.003984693819074891, 'critic_loss': 4109374970396.672, 'actor_loss': -7969500.4805, 'temp': 43540.416328125, 'temp_loss': -964.1413085327148, 'mmd_loss': 5603.986874023438, 'alpha': 22026.466796875, 'time_step': 0.04203931140899658, 'td_error': 1951881544490.7485, 'value_scale': 8011208.872799665, 'discounted_advantage': -10030383.007037727, 'initial_state': 7873770.0, 'diff_eval': 113286.65108387028} step=160000
2025-12-06 07:07.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.85it/s, imitator_loss=0.00389, critic_loss=4.27e+12, actor_loss=-8.17e+6, temp=4.45e+4, temp_loss=-1.2e+3, mmd_loss=5.61e+3, alpha=2.2e+4]


2025-12-06 07:08.12 [info     ] BEAR_20251206050833: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.005092178821563721, 'time_algorithm_update': 0.0358605899810791, 'imitator_loss': 0.003898726975894533, 'critic_loss': 4268314694778.88, 'actor_loss': -8173990.148, 'temp': 44537.168765625, 'temp_loss': -1198.2106476440429, 'mmd_loss': 5610.973849121094, 'alpha': 22026.466796875, 'time_step': 0.04125122904777527, 'td_error': 2068528096219.645, 'value_scale': 8219382.9763202015, 'discounted_advantage': -10585794.873896273, 'initial_state': 8078707.0, 'diff_eval': 113230.94340210728} step=161000
2025-12-06 07:08.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.22it/s, imitator_loss=0.00392, critic_loss=4.55e+12, actor_loss=-8.37e+6, temp=4.55e+4, temp_loss=-778, mmd_loss=5.6e+3, alpha=2.2e+4]   


2025-12-06 07:09.01 [info     ] BEAR_20251206050833: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.005163911342620849, 'time_algorithm_update': 0.036900698184967044, 'imitator_loss': 0.003918015158502385, 'critic_loss': 4551373983318.016, 'actor_loss': -8375359.29, 'temp': 45549.739078125, 'temp_loss': -783.300770904541, 'mmd_loss': 5603.1573125, 'alpha': 22026.466796875, 'time_step': 0.04236932516098022, 'td_error': 2179936470453.075, 'value_scale': 8424756.261525566, 'discounted_advantage': -10979715.641267698, 'initial_state': 8282334.5, 'diff_eval': 113248.82224535242} step=162000
2025-12-06 07:09.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.24it/s, imitator_loss=0.00389, critic_loss=4.75e+12, actor_loss=-8.58e+6, temp=4.65e+4, temp_loss=-953, mmd_loss=5.6e+3, alpha=2.2e+4]    


2025-12-06 07:09.49 [info     ] BEAR_20251206050833: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.005301224946975708, 'time_algorithm_update': 0.03668584489822388, 'imitator_loss': 0.003888877093908377, 'critic_loss': 4750195129843.712, 'actor_loss': -8580641.22, 'temp': 46476.90366015625, 'temp_loss': -963.4729570922851, 'mmd_loss': 5601.612193359375, 'alpha': 22026.466796875, 'time_step': 0.04230215811729431, 'td_error': 2279748313700.52, 'value_scale': 8641564.373218777, 'discounted_advantage': -10993003.297095923, 'initial_state': 8498972.0, 'diff_eval': 113252.38513719506} step=163000
2025-12-06 07:09.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.79it/s, imitator_loss=0.00386, critic_loss=4.98e+12, actor_loss=-8.8e+6, temp=4.75e+4, temp_loss=-1.16e+3, mmd_loss=5.6e+3, alpha=2.2e+4] 


2025-12-06 07:10.37 [info     ] BEAR_20251206050833: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.005049553155899048, 'time_algorithm_update': 0.03599657320976257, 'imitator_loss': 0.0038650277674896643, 'critic_loss': 4982079963529.216, 'actor_loss': -8798944.89, 'temp': 47555.534890625, 'temp_loss': -1168.2180285339355, 'mmd_loss': 5597.201643066406, 'alpha': 22026.466796875, 'time_step': 0.041338423013687134, 'td_error': 2405309855796.043, 'value_scale': 8871618.544635372, 'discounted_advantage': -11209274.124896903, 'initial_state': 8727321.0, 'diff_eval': 113236.55236451884} step=164000
2025-12-06 07:10.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.01it/s, imitator_loss=0.00393, critic_loss=5.25e+12, actor_loss=-9.02e+6, temp=4.89e+4, temp_loss=-1.6e+3, mmd_loss=5.61e+3, alpha=2.2e+4]


2025-12-06 07:11.24 [info     ] BEAR_20251206050833: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.005002411603927612, 'time_algorithm_update': 0.035668136835098264, 'imitator_loss': 0.003924796807812526, 'critic_loss': 5248300207308.8, 'actor_loss': -9021193.465, 'temp': 48944.77713671875, 'temp_loss': -1591.0995492248535, 'mmd_loss': 5612.627780273438, 'alpha': 22026.466796875, 'time_step': 0.04096815323829651, 'td_error': 2540522147767.641, 'value_scale': 9102260.786672255, 'discounted_advantage': -11623015.639652641, 'initial_state': 8955392.0, 'diff_eval': 113269.48077558851} step=165000
2025-12-06 07:11.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.55it/s, imitator_loss=0.00386, critic_loss=5.56e+12, actor_loss=-9.25e+6, temp=5.02e+4, temp_loss=-815, mmd_loss=5.6e+3, alpha=2.2e+4] 


2025-12-06 07:12.12 [info     ] BEAR_20251206050833: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.005176884174346924, 'time_algorithm_update': 0.03629325318336487, 'imitator_loss': 0.0038597382090520115, 'critic_loss': 5565310027956.224, 'actor_loss': -9250970.428, 'temp': 50225.538015625, 'temp_loss': -795.5068285522461, 'mmd_loss': 5601.185117675781, 'alpha': 22026.466796875, 'time_step': 0.04177131724357605, 'td_error': 2682061873318.635, 'value_scale': 9333710.23281643, 'discounted_advantage': -11978294.908683665, 'initial_state': 9188707.0, 'diff_eval': 113291.3757192479} step=166000
2025-12-06 07:12.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.74it/s, imitator_loss=0.00386, critic_loss=5.87e+12, actor_loss=-9.48e+6, temp=5.12e+4, temp_loss=-878, mmd_loss=5.6e+3, alpha=2.2e+4]    


2025-12-06 07:12.59 [info     ] BEAR_20251206050833: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.0051181774139404295, 'time_algorithm_update': 0.03601490712165833, 'imitator_loss': 0.0038622040952323005, 'critic_loss': 5876317595631.616, 'actor_loss': -9477615.88, 'temp': 51214.78071875, 'temp_loss': -854.9600097961426, 'mmd_loss': 5597.754485351563, 'alpha': 22026.466796875, 'time_step': 0.04143191170692444, 'td_error': 2817694898667.437, 'value_scale': 9557679.052808046, 'discounted_advantage': -12419884.324668525, 'initial_state': 9409411.0, 'diff_eval': 113248.75174851909} step=167000
2025-12-06 07:12.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, imitator_loss=0.0038, critic_loss=6.15e+12, actor_loss=-9.7e+6, temp=5.23e+4, temp_loss=-1.17e+3, mmd_loss=5.61e+3, alpha=2.2e+4] 


2025-12-06 07:13.47 [info     ] BEAR_20251206050833: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.005077100038528443, 'time_algorithm_update': 0.03592559146881104, 'imitator_loss': 0.0037988376745488493, 'critic_loss': 6152988897574.912, 'actor_loss': -9705514.914, 'temp': 52263.23206640625, 'temp_loss': -1151.8925542907714, 'mmd_loss': 5613.357580566406, 'alpha': 22026.466796875, 'time_step': 0.04130940556526184, 'td_error': 2948683274307.054, 'value_scale': 9791967.567476949, 'discounted_advantage': -12495913.391028786, 'initial_state': 9637471.0, 'diff_eval': 113249.21755216588} step=168000
2025-12-06 07:13.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.38it/s, imitator_loss=0.00377, critic_loss=6.44e+12, actor_loss=-9.93e+6, temp=5.33e+4, temp_loss=-1.03e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:14.35 [info     ] BEAR_20251206050833: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.0050395820140838625, 'time_algorithm_update': 0.03610283851623535, 'imitator_loss': 0.003768155392142944, 'critic_loss': 6442901647654.912, 'actor_loss': -9933693.654, 'temp': 53331.403703125, 'temp_loss': -1017.2902489318848, 'mmd_loss': 5597.018146972656, 'alpha': 22026.466796875, 'time_step': 0.041475773334503176, 'td_error': 3108919006399.2856, 'value_scale': 10030609.564124057, 'discounted_advantage': -13038606.448138429, 'initial_state': 9876363.0, 'diff_eval': 113275.39931323682} step=169000
2025-12-06 07:14.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.12it/s, imitator_loss=0.00385, critic_loss=6.75e+12, actor_loss=-1.02e+7, temp=5.44e+4, temp_loss=-1.19e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:15.22 [info     ] BEAR_20251206050833: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.004959871530532837, 'time_algorithm_update': 0.035511566638946536, 'imitator_loss': 0.003844778055557981, 'critic_loss': 6753876647084.032, 'actor_loss': -10169803.219, 'temp': 54442.68184375, 'temp_loss': -1209.374079864502, 'mmd_loss': 5600.668780761719, 'alpha': 22026.466796875, 'time_step': 0.040776427507400514, 'td_error': 3260615043190.214, 'value_scale': 10274899.054065382, 'discounted_advantage': -13268871.18756358, 'initial_state': 10118798.0, 'diff_eval': 113243.6000153125} step=170000
2025-12-06 07:15.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.64it/s, imitator_loss=0.0038, critic_loss=7.06e+12, actor_loss=-1.04e+7, temp=5.56e+4, temp_loss=-1.05e+3, mmd_loss=5.59e+3, alpha=2.2e+4]


2025-12-06 07:16.09 [info     ] BEAR_20251206050833: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.005116285085678101, 'time_algorithm_update': 0.036144405603408813, 'imitator_loss': 0.0038064850483788175, 'critic_loss': 7060895811764.224, 'actor_loss': -10411313.718, 'temp': 55625.0299921875, 'temp_loss': -1064.6384062194825, 'mmd_loss': 5594.812829101563, 'alpha': 22026.466796875, 'time_step': 0.04157989931106568, 'td_error': 3399440271984.2324, 'value_scale': 10519330.376362111, 'discounted_advantage': -13279230.468250558, 'initial_state': 10362396.0, 'diff_eval': 113276.27436249214} step=171000
2025-12-06 07:16.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.95it/s, imitator_loss=0.00384, critic_loss=7.37e+12, actor_loss=-1.07e+7, temp=5.68e+4, temp_loss=-1.14e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:16.56 [info     ] BEAR_20251206050833: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.004989632844924927, 'time_algorithm_update': 0.03577715349197388, 'imitator_loss': 0.0038415427908767017, 'critic_loss': 7371355351416.832, 'actor_loss': -10654885.384, 'temp': 56820.27587890625, 'temp_loss': -1121.3829073791503, 'mmd_loss': 5602.794324707032, 'alpha': 22026.466796875, 'time_step': 0.04106682586669922, 'td_error': 3555435754031.362, 'value_scale': 10763897.261106454, 'discounted_advantage': -13434620.895591065, 'initial_state': 10607686.0, 'diff_eval': 113270.95790075796} step=172000
2025-12-06 07:16.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.71it/s, imitator_loss=0.00377, critic_loss=7.8e+12, actor_loss=-1.09e+7, temp=5.82e+4, temp_loss=-1.31e+3, mmd_loss=5.6e+3, alpha=2.2e+4] 


2025-12-06 07:17.44 [info     ] BEAR_20251206050833: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.005040324449539184, 'time_algorithm_update': 0.036127111673355104, 'imitator_loss': 0.0037674881191924214, 'critic_loss': 7807472868261.888, 'actor_loss': -10894354.83, 'temp': 58224.82153125, 'temp_loss': -1267.1290164794923, 'mmd_loss': 5599.939658691406, 'alpha': 22026.466796875, 'time_step': 0.041477454662323, 'td_error': 3741519527049.149, 'value_scale': 11015878.095557418, 'discounted_advantage': -14032172.75747753, 'initial_state': 10855836.0, 'diff_eval': 113261.36483199318} step=173000
2025-12-06 07:17.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.92it/s, imitator_loss=0.00378, critic_loss=8.13e+12, actor_loss=-1.11e+7, temp=5.93e+4, temp_loss=-1.32e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:18.31 [info     ] BEAR_20251206050833: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.005023585557937622, 'time_algorithm_update': 0.03580944633483887, 'imitator_loss': 0.0037845661491155624, 'critic_loss': 8128996811735.04, 'actor_loss': -11139240.265, 'temp': 59335.7482421875, 'temp_loss': -1359.8669478759766, 'mmd_loss': 5603.6227421875, 'alpha': 22026.466796875, 'time_step': 0.04112639355659485, 'td_error': 3912722522300.994, 'value_scale': 11260766.852053646, 'discounted_advantage': -14349220.270001482, 'initial_state': 11094851.0, 'diff_eval': 113232.37360766428} step=174000
2025-12-06 07:18.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.50it/s, imitator_loss=0.00376, critic_loss=8.46e+12, actor_loss=-1.14e+7, temp=6.05e+4, temp_loss=-961, mmd_loss=5.59e+3, alpha=2.2e+4]   


2025-12-06 07:19.19 [info     ] BEAR_20251206050833: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.005068174362182617, 'time_algorithm_update': 0.03645417547225952, 'imitator_loss': 0.003759516137535684, 'critic_loss': 8463013326094.336, 'actor_loss': -11383683.512, 'temp': 60528.69087109375, 'temp_loss': -930.6283666992188, 'mmd_loss': 5591.246760253906, 'alpha': 22026.466796875, 'time_step': 0.041833270788192746, 'td_error': 4082905050840.842, 'value_scale': 11517362.649203688, 'discounted_advantage': -14540371.526856191, 'initial_state': 11352486.0, 'diff_eval': 113257.0862212393} step=175000
2025-12-06 07:19.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.91it/s, imitator_loss=0.0037, critic_loss=8.86e+12, actor_loss=-1.16e+7, temp=6.15e+4, temp_loss=-1.12e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:20.07 [info     ] BEAR_20251206050833: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.005016395568847656, 'time_algorithm_update': 0.035806849479675294, 'imitator_loss': 0.0036950999160762876, 'critic_loss': 8861698824339.455, 'actor_loss': -11636421.318, 'temp': 61546.31115234375, 'temp_loss': -1154.1372614135742, 'mmd_loss': 5603.248957519531, 'alpha': 22026.466796875, 'time_step': 0.0411263473033905, 'td_error': 4277894923711.881, 'value_scale': 11777962.581307627, 'discounted_advantage': -14968631.809002334, 'initial_state': 11606957.0, 'diff_eval': 113241.94387026945} step=176000
2025-12-06 07:20.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.55it/s, imitator_loss=0.00374, critic_loss=9.28e+12, actor_loss=-1.19e+7, temp=6.3e+4, temp_loss=-1.59e+3, mmd_loss=5.6e+3, alpha=2.2e+4] 


2025-12-06 07:20.54 [info     ] BEAR_20251206050833: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.005146844387054443, 'time_algorithm_update': 0.03623347878456116, 'imitator_loss': 0.003742514597484842, 'critic_loss': 9274708067876.863, 'actor_loss': -11896896.788, 'temp': 63018.24652734375, 'temp_loss': -1550.197652770996, 'mmd_loss': 5600.5869365234375, 'alpha': 22026.466796875, 'time_step': 0.041697014331817626, 'td_error': 4488703937054.966, 'value_scale': 12044321.46856664, 'discounted_advantage': -15517526.819051012, 'initial_state': 11875336.0, 'diff_eval': 113252.05840175529} step=177000
2025-12-06 07:20.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.84it/s, imitator_loss=0.00367, critic_loss=9.64e+12, actor_loss=-1.22e+7, temp=6.47e+4, temp_loss=-1.41e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:21.42 [info     ] BEAR_20251206050833: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.005076406955718994, 'time_algorithm_update': 0.0359146568775177, 'imitator_loss': 0.0036720284533221276, 'critic_loss': 9638988254019.584, 'actor_loss': -12174306.797, 'temp': 64669.2038515625, 'temp_loss': -1394.8426315917968, 'mmd_loss': 5598.0571513671875, 'alpha': 22026.466796875, 'time_step': 0.041295485258102414, 'td_error': 4715473686607.884, 'value_scale': 12321046.391869238, 'discounted_advantage': -16041478.23261261, 'initial_state': 12150095.0, 'diff_eval': 113266.20353550364} step=178000
2025-12-06 07:21.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.01it/s, imitator_loss=0.00371, critic_loss=1.02e+13, actor_loss=-1.25e+7, temp=6.61e+4, temp_loss=-1.21e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:22.29 [info     ] BEAR_20251206050833: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.005039743423461914, 'time_algorithm_update': 0.035606919288635255, 'imitator_loss': 0.0037149093097541483, 'critic_loss': 10225067346624.512, 'actor_loss': -12451837.334, 'temp': 66081.32873046875, 'temp_loss': -1196.9286627197266, 'mmd_loss': 5599.812745605469, 'alpha': 22026.466796875, 'time_step': 0.04094679117202759, 'td_error': 4897699963265.233, 'value_scale': 12594271.22296731, 'discounted_advantage': -16060771.323263388, 'initial_state': 12423030.0, 'diff_eval': 113275.33636391473} step=179000
2025-12-06 07:22.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.03it/s, imitator_loss=0.00372, critic_loss=1.06e+13, actor_loss=-1.27e+7, temp=6.71e+4, temp_loss=-1.04e+3, mmd_loss=5.59e+3, alpha=2.2e+4]


2025-12-06 07:23.16 [info     ] BEAR_20251206050833: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.0050040769577026365, 'time_algorithm_update': 0.03559516382217407, 'imitator_loss': 0.0037157496544532476, 'critic_loss': 10587682297085.951, 'actor_loss': -12726135.261, 'temp': 67093.1515546875, 'temp_loss': -1034.9764497680665, 'mmd_loss': 5590.712909667969, 'alpha': 22026.466796875, 'time_step': 0.04091202902793884, 'td_error': 5109257631376.737, 'value_scale': 12867292.57082984, 'discounted_advantage': -16356716.218033694, 'initial_state': 12693486.0, 'diff_eval': 113249.49719731986} step=180000
2025-12-06 07:23.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.90it/s, imitator_loss=0.00369, critic_loss=1.11e+13, actor_loss=-1.3e+7, temp=6.83e+4, temp_loss=-1.45e+3, mmd_loss=5.6e+3, alpha=2.2e+4] 


2025-12-06 07:24.03 [info     ] BEAR_20251206050833: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.004994434356689453, 'time_algorithm_update': 0.03585510587692261, 'imitator_loss': 0.003681883257231675, 'critic_loss': 11080347510374.4, 'actor_loss': -12995657.144, 'temp': 68330.91371875, 'temp_loss': -1442.5182154541017, 'mmd_loss': 5596.985953125, 'alpha': 22026.466796875, 'time_step': 0.04115454769134522, 'td_error': 5323690644633.3545, 'value_scale': 13129369.815590948, 'discounted_advantage': -16711209.254393118, 'initial_state': 12954982.0, 'diff_eval': 113237.61862653299} step=181000
2025-12-06 07:24.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.69it/s, imitator_loss=0.00366, critic_loss=1.15e+13, actor_loss=-1.33e+7, temp=6.95e+4, temp_loss=-663, mmd_loss=5.59e+3, alpha=2.2e+4]   


2025-12-06 07:24.51 [info     ] BEAR_20251206050833: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.005143021583557129, 'time_algorithm_update': 0.03610312342643738, 'imitator_loss': 0.0036602454292587938, 'critic_loss': 11527642695073.793, 'actor_loss': -13254213.916, 'temp': 69495.5480546875, 'temp_loss': -679.0747949829101, 'mmd_loss': 5589.3511962890625, 'alpha': 22026.466796875, 'time_step': 0.041533618927001956, 'td_error': 5501563057262.77, 'value_scale': 13385983.794216262, 'discounted_advantage': -16684711.744651062, 'initial_state': 13209021.0, 'diff_eval': 113259.79084857131} step=182000
2025-12-06 07:24.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.33it/s, imitator_loss=0.00371, critic_loss=1.19e+13, actor_loss=-1.35e+7, temp=7e+4, temp_loss=-184, mmd_loss=5.58e+3, alpha=2.2e+4]     


2025-12-06 07:25.39 [info     ] BEAR_20251206050833: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.0052757587432861325, 'time_algorithm_update': 0.036573943853378296, 'imitator_loss': 0.003708852207637392, 'critic_loss': 11927092315815.936, 'actor_loss': -13505465.345, 'temp': 70050.318046875, 'temp_loss': -201.75439169311522, 'mmd_loss': 5581.888657714844, 'alpha': 22026.466796875, 'time_step': 0.04216070556640625, 'td_error': 5711311872331.046, 'value_scale': 13638890.541072926, 'discounted_advantage': -17041127.623213883, 'initial_state': 13452882.0, 'diff_eval': 113292.43240729922} step=183000
2025-12-06 07:25.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.78it/s, imitator_loss=0.00372, critic_loss=1.24e+13, actor_loss=-1.38e+7, temp=7.07e+4, temp_loss=-1.29e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:26.27 [info     ] BEAR_20251206050833: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.005092475891113281, 'time_algorithm_update': 0.03596538829803467, 'imitator_loss': 0.0037189175923122092, 'critic_loss': 12406022687162.367, 'actor_loss': -13756970.097, 'temp': 70717.4001328125, 'temp_loss': -1280.7880242004394, 'mmd_loss': 5595.899657714844, 'alpha': 22026.466796875, 'time_step': 0.041370751142501834, 'td_error': 5935686797594.569, 'value_scale': 13892061.422883486, 'discounted_advantage': -17506752.171532247, 'initial_state': 13696581.0, 'diff_eval': 113264.51213532902} step=184000
2025-12-06 07:26.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.04it/s, imitator_loss=0.00373, critic_loss=1.28e+13, actor_loss=-1.4e+7, temp=7.22e+4, temp_loss=-1.24e+3, mmd_loss=5.59e+3, alpha=2.2e+4]


2025-12-06 07:27.16 [info     ] BEAR_20251206050833: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.005109489679336548, 'time_algorithm_update': 0.037218387365341186, 'imitator_loss': 0.00372063925571274, 'critic_loss': 12850031411331.072, 'actor_loss': -14015281.752, 'temp': 72215.7925390625, 'temp_loss': -1226.388480041504, 'mmd_loss': 5594.371031738281, 'alpha': 22026.466796875, 'time_step': 0.04265072250366211, 'td_error': 6150284368242.133, 'value_scale': 14143103.539815592, 'discounted_advantage': -17837425.815286595, 'initial_state': 13941480.0, 'diff_eval': 113222.29891387095} step=185000
2025-12-06 07:27.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.83it/s, imitator_loss=0.00359, critic_loss=1.33e+13, actor_loss=-1.43e+7, temp=7.35e+4, temp_loss=-1.48e+3, mmd_loss=5.59e+3, alpha=2.2e+4]


2025-12-06 07:28.03 [info     ] BEAR_20251206050833: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.005006093978881836, 'time_algorithm_update': 0.035980960130691526, 'imitator_loss': 0.0035935247527668253, 'critic_loss': 13299935802818.56, 'actor_loss': -14288603.782, 'temp': 73545.1201015625, 'temp_loss': -1485.7505939941407, 'mmd_loss': 5594.765455566407, 'alpha': 22026.466796875, 'time_step': 0.04127388644218445, 'td_error': 6401245924331.988, 'value_scale': 14430530.835289188, 'discounted_advantage': -18257966.643450867, 'initial_state': 14226998.0, 'diff_eval': 113278.58514848119} step=186000
2025-12-06 07:28.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.10it/s, imitator_loss=0.00369, critic_loss=1.39e+13, actor_loss=-1.46e+7, temp=7.52e+4, temp_loss=-2.05e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:28.50 [info     ] BEAR_20251206050833: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.004980873346328735, 'time_algorithm_update': 0.03547450017929077, 'imitator_loss': 0.003689963382668793, 'critic_loss': 13920170767548.416, 'actor_loss': -14583419.687, 'temp': 75241.8410625, 'temp_loss': -2052.052070373535, 'mmd_loss': 5602.213665039062, 'alpha': 22026.466796875, 'time_step': 0.040770020246505737, 'td_error': 6656973207275.817, 'value_scale': 14726080.134115675, 'discounted_advantage': -18401360.397664882, 'initial_state': 14519872.0, 'diff_eval': 113220.27151678553} step=187000
2025-12-06 07:28.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.10it/s, imitator_loss=0.00359, critic_loss=1.45e+13, actor_loss=-1.49e+7, temp=7.73e+4, temp_loss=-2.16e+3, mmd_loss=5.61e+3, alpha=2.2e+4]


2025-12-06 07:29.37 [info     ] BEAR_20251206050833: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.005007390260696411, 'time_algorithm_update': 0.0354779052734375, 'imitator_loss': 0.0035886565933469683, 'critic_loss': 14511950647001.088, 'actor_loss': -14889753.68, 'temp': 77358.2924375, 'temp_loss': -2114.2325536499025, 'mmd_loss': 5605.497022460938, 'alpha': 22026.466796875, 'time_step': 0.040795485496521, 'td_error': 6958146257339.981, 'value_scale': 15040999.382229673, 'discounted_advantage': -18949809.477112815, 'initial_state': 14837475.0, 'diff_eval': 113258.72498169969} step=188000
2025-12-06 07:29.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.66it/s, imitator_loss=0.00361, critic_loss=1.52e+13, actor_loss=-1.52e+7, temp=7.92e+4, temp_loss=-1.38e+3, mmd_loss=5.59e+3, alpha=2.2e+4]


2025-12-06 07:30.25 [info     ] BEAR_20251206050833: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.005072477102279663, 'time_algorithm_update': 0.03623411965370178, 'imitator_loss': 0.0036121005767490713, 'critic_loss': 15198476487360.512, 'actor_loss': -15210186.462, 'temp': 79193.5062109375, 'temp_loss': -1350.044760925293, 'mmd_loss': 5591.780366210937, 'alpha': 22026.466796875, 'time_step': 0.0415969250202179, 'td_error': 7233118216485.438, 'value_scale': 15345841.03394803, 'discounted_advantage': -19227988.059978135, 'initial_state': 15141290.0, 'diff_eval': 113260.89433930645} step=189000
2025-12-06 07:30.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.15it/s, imitator_loss=0.00366, critic_loss=1.59e+13, actor_loss=-1.55e+7, temp=8.09e+4, temp_loss=-1.61e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:31.12 [info     ] BEAR_20251206050833: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.005012158155441284, 'time_algorithm_update': 0.03538865637779236, 'imitator_loss': 0.003654007082222961, 'critic_loss': 15899178132570.111, 'actor_loss': -15533644.291, 'temp': 80880.8392421875, 'temp_loss': -1656.4103923034668, 'mmd_loss': 5598.472601074219, 'alpha': 22026.466796875, 'time_step': 0.04071503233909607, 'td_error': 7567679863732.063, 'value_scale': 15657498.795892708, 'discounted_advantage': -20022949.728535008, 'initial_state': 15447443.0, 'diff_eval': 113234.35391538481} step=190000
2025-12-06 07:31.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.75it/s, imitator_loss=0.00364, critic_loss=1.64e+13, actor_loss=-1.59e+7, temp=8.25e+4, temp_loss=-1.58e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:31.59 [info     ] BEAR_20251206050833: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.005039647817611694, 'time_algorithm_update': 0.03605484437942505, 'imitator_loss': 0.0036357494222465903, 'critic_loss': 16386858941415.424, 'actor_loss': -15860575.614, 'temp': 82523.712125, 'temp_loss': -1595.0746876220703, 'mmd_loss': 5595.89240625, 'alpha': 22026.466796875, 'time_step': 0.041406458377838136, 'td_error': 7841897222624.592, 'value_scale': 15971131.750628667, 'discounted_advantage': -20171255.5870839, 'initial_state': 15767583.0, 'diff_eval': 113208.71077496128} step=191000
2025-12-06 07:32.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.00it/s, imitator_loss=0.00355, critic_loss=1.71e+13, actor_loss=-1.62e+7, temp=8.41e+4, temp_loss=-1.16e+3, mmd_loss=5.6e+3, alpha=2.2e+4]


2025-12-06 07:32.46 [info     ] BEAR_20251206050833: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.004994287967681885, 'time_algorithm_update': 0.03566641020774841, 'imitator_loss': 0.003548616724321619, 'critic_loss': 17079155884556.287, 'actor_loss': -16183663.82, 'temp': 84088.32065625, 'temp_loss': -1216.1186018371582, 'mmd_loss': 5597.373302734375, 'alpha': 22026.466796875, 'time_step': 0.04096774172782898, 'td_error': 8140102841977.44, 'value_scale': 16285350.064124057, 'discounted_advantage': -20405446.15877714, 'initial_state': 16079360.0, 'diff_eval': 113204.0809663707} step=192000
2025-12-06 07:32.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.61it/s, imitator_loss=0.00359, critic_loss=1.79e+13, actor_loss=-1.65e+7, temp=8.53e+4, temp_loss=-1.14e+3, mmd_loss=5.59e+3, alpha=2.2e+4]


2025-12-06 07:33.34 [info     ] BEAR_20251206050833: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.005143286466598511, 'time_algorithm_update': 0.03621588325500488, 'imitator_loss': 0.003588423933950253, 'critic_loss': 17873649195286.527, 'actor_loss': -16505171.936, 'temp': 85295.225328125, 'temp_loss': -1166.1952077026367, 'mmd_loss': 5585.40169140625, 'alpha': 22026.466796875, 'time_step': 0.04166495633125305, 'td_error': 8436689498688.107, 'value_scale': 16597678.50838223, 'discounted_advantage': -20566921.409862667, 'initial_state': 16396191.0, 'diff_eval': 113273.51789350742} step=193000
2025-12-06 07:33.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.86it/s, imitator_loss=0.00351, critic_loss=1.86e+13, actor_loss=-1.68e+7, temp=8.66e+4, temp_loss=-1.32e+3, mmd_loss=5.59e+3, alpha=2.2e+4]


2025-12-06 07:34.21 [info     ] BEAR_20251206050833: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.0050042829513549806, 'time_algorithm_update': 0.03592634201049805, 'imitator_loss': 0.003511687566060573, 'critic_loss': 18654256496115.71, 'actor_loss': -16826711.492, 'temp': 86559.786234375, 'temp_loss': -1345.6557685852051, 'mmd_loss': 5586.521411621094, 'alpha': 22026.466796875, 'time_step': 0.04123598837852478, 'td_error': 8762105822523.747, 'value_scale': 16893884.89564124, 'discounted_advantage': -21247951.55744886, 'initial_state': 16687657.0, 'diff_eval': 113270.15590812895} step=194000
2025-12-06 07:34.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.62it/s, imitator_loss=0.00354, critic_loss=1.94e+13, actor_loss=-1.71e+7, temp=8.8e+4, temp_loss=-1.05e+3, mmd_loss=5.59e+3, alpha=2.2e+4]


2025-12-06 07:35.09 [info     ] BEAR_20251206050833: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.005035418510437012, 'time_algorithm_update': 0.03631494522094727, 'imitator_loss': 0.0035395693372702224, 'critic_loss': 19343420784902.145, 'actor_loss': -17149851.115, 'temp': 88005.792859375, 'temp_loss': -982.2502014770508, 'mmd_loss': 5585.979122558594, 'alpha': 22026.466796875, 'time_step': 0.041660447835922244, 'td_error': 9070861667764.908, 'value_scale': 17206324.588432524, 'discounted_advantage': -21520944.413527995, 'initial_state': 16999890.0, 'diff_eval': 113229.18028383343} step=195000
2025-12-06 07:35.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, imitator_loss=0.00356, critic_loss=2.01e+13, actor_loss=-1.75e+7, temp=8.9e+4, temp_loss=-1.35e+3, mmd_loss=5.58e+3, alpha=2.2e+4]


2025-12-06 07:35.57 [info     ] BEAR_20251206050833: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.005042786359786987, 'time_algorithm_update': 0.035944143295288083, 'imitator_loss': 0.003557028467184864, 'critic_loss': 20071923797983.23, 'actor_loss': -17478039.016, 'temp': 89010.6055625, 'temp_loss': -1322.249052307129, 'mmd_loss': 5583.07498046875, 'alpha': 22026.466796875, 'time_step': 0.0412831494808197, 'td_error': 9385504180670.527, 'value_scale': 17509397.981978208, 'discounted_advantage': -21929863.051104233, 'initial_state': 17296578.0, 'diff_eval': 113196.28618208061} step=196000
2025-12-06 07:35.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.16it/s, imitator_loss=0.0035, critic_loss=2.1e+13, actor_loss=-1.78e+7, temp=9.03e+4, temp_loss=-892, mmd_loss=5.58e+3, alpha=2.2e+4]     


2025-12-06 07:36.45 [info     ] BEAR_20251206050833: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.005230857849121094, 'time_algorithm_update': 0.03686068844795227, 'imitator_loss': 0.003498615185613744, 'critic_loss': 21030240706887.68, 'actor_loss': -17800277.318, 'temp': 90273.8745625, 'temp_loss': -853.1833985595703, 'mmd_loss': 5578.149729003906, 'alpha': 22026.466796875, 'time_step': 0.04242697954177856, 'td_error': 9721344314763.059, 'value_scale': 17821536.285834033, 'discounted_advantage': -22349027.679420613, 'initial_state': 17604894.0, 'diff_eval': 113263.55564528638} step=197000
2025-12-06 07:36.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.65it/s, imitator_loss=0.00351, critic_loss=2.15e+13, actor_loss=-1.81e+7, temp=9.14e+4, temp_loss=-1.12e+3, mmd_loss=5.59e+3, alpha=2.2e+4]


2025-12-06 07:37.33 [info     ] BEAR_20251206050833: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.005114611148834229, 'time_algorithm_update': 0.036186105489730835, 'imitator_loss': 0.003514326015487313, 'critic_loss': 21504941992443.902, 'actor_loss': -18116963.372, 'temp': 91357.543546875, 'temp_loss': -1055.093674987793, 'mmd_loss': 5584.82773046875, 'alpha': 22026.466796875, 'time_step': 0.041610430479049686, 'td_error': 10009945638361.496, 'value_scale': 18110792.22045264, 'discounted_advantage': -22526722.351617314, 'initial_state': 17890514.0, 'diff_eval': 113277.56636002597} step=198000
2025-12-06 07:37.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.05it/s, imitator_loss=0.00353, critic_loss=2.22e+13, actor_loss=-1.84e+7, temp=9.19e+4, temp_loss=-318, mmd_loss=5.57e+3, alpha=2.2e+4]  


2025-12-06 07:38.20 [info     ] BEAR_20251206050833: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.005049972057342529, 'time_algorithm_update': 0.035526376724243164, 'imitator_loss': 0.0035323972491314633, 'critic_loss': 22239607239213.055, 'actor_loss': -18417498.504, 'temp': 91877.400921875, 'temp_loss': -312.95253479003907, 'mmd_loss': 5574.217401367187, 'alpha': 22026.466796875, 'time_step': 0.04091716837882996, 'td_error': 10283252705765.965, 'value_scale': 18387187.139564123, 'discounted_advantage': -22651489.1881265, 'initial_state': 18155190.0, 'diff_eval': 113265.33673284999} step=199000
2025-12-06 07:38.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.16it/s, imitator_loss=0.00345, critic_loss=2.3e+13, actor_loss=-1.87e+7, temp=9.3e+4, temp_loss=-1.43e+3, mmd_loss=5.59e+3, alpha=2.2e+4] 


2025-12-06 07:39.07 [info     ] BEAR_20251206050833: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.004937817335128784, 'time_algorithm_update': 0.03548504614830017, 'imitator_loss': 0.0034467784841544927, 'critic_loss': 23029650395496.45, 'actor_loss': -18715119.638, 'temp': 92980.613421875, 'temp_loss': -1404.1329607543946, 'mmd_loss': 5592.486658691406, 'alpha': 22026.466796875, 'time_step': 0.04071929407119751, 'td_error': 10603010667070.133, 'value_scale': 18664076.430846605, 'discounted_advantage': -23232439.507476147, 'initial_state': 18423166.0, 'diff_eval': 113282.04896931676} step=200000
2025-12-06 07:39.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\BEAR_20251206050833\model_200000.d3
Training model:  CQL
2025-12-06 07:39.07 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signat

Epoch 1/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.22it/s, critic_loss=-65.7, conservative_loss=-66.9, alpha=0.952, actor_loss=-0.848, temp=0.962, temp_loss=0.753]


2025-12-06 07:39.54 [info     ] CQL_20251206073907: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.005106236457824707, 'time_algorithm_update': 0.036962885856628415, 'critic_loss': -65.67587659454345, 'conservative_loss': -66.93626169204713, 'alpha': 0.951105207502842, 'actor_loss': -0.8530453704595565, 'temp': 0.9615534249544143, 'temp_loss': 0.7504633855223656, 'time_step': 0.04236152505874634, 'td_error': 1.16004990904145, 'value_scale': 2.604065583957935, 'discounted_advantage': -1.2720999874601653, 'initial_state': 3.2653632164001465, 'diff_eval': 2982.59546614105} step=1000
2025-12-06 07:39.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.40it/s, critic_loss=-61, conservative_loss=-63.1, alpha=0.862, actor_loss=-1.7, temp=0.895, temp_loss=0.416]  


2025-12-06 07:40.40 [info     ] CQL_20251206073907: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.005066714525222778, 'time_algorithm_update': 0.03669072580337524, 'critic_loss': -60.97713648223877, 'conservative_loss': -63.088464389801025, 'alpha': 0.8620305636525154, 'actor_loss': -1.7036966488361358, 'temp': 0.8951029952764511, 'temp_loss': 0.41577657821774483, 'time_step': 0.04203527569770813, 'td_error': 1.2271402621519953, 'value_scale': 3.15467977737157, 'discounted_advantage': -0.7352612612228934, 'initial_state': 4.299067497253418, 'diff_eval': 2685.4165789952317} step=2000
2025-12-06 07:40.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.16it/s, critic_loss=-55.1, conservative_loss=-57.9, alpha=0.784, actor_loss=-1.99, temp=0.839, temp_loss=0.278]


2025-12-06 07:41.27 [info     ] CQL_20251206073907: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.005205003976821899, 'time_algorithm_update': 0.037033719778060914, 'critic_loss': -55.05081683349609, 'conservative_loss': -57.84957329177856, 'alpha': 0.7840037305951119, 'actor_loss': -1.9905776559114456, 'temp': 0.8383999111056328, 'temp_loss': 0.2780813806653023, 'time_step': 0.04252293515205383, 'td_error': 1.5580570059898051, 'value_scale': 3.79569502063845, 'discounted_advantage': -0.8840120216155682, 'initial_state': 4.840728759765625, 'diff_eval': 2683.173578011264} step=3000
2025-12-06 07:41.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.32it/s, critic_loss=-49.8, conservative_loss=-53, alpha=0.715, actor_loss=-2.09, temp=0.788, temp_loss=0.199] 


2025-12-06 07:42.13 [info     ] CQL_20251206073907: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.005155014276504517, 'time_algorithm_update': 0.03680142617225647, 'critic_loss': -49.740697383880615, 'conservative_loss': -52.975010150909426, 'alpha': 0.7144267412424088, 'actor_loss': -2.09358325278759, 'temp': 0.7877562466859818, 'temp_loss': 0.198716711897403, 'time_step': 0.0422329638004303, 'td_error': 1.4876841522907511, 'value_scale': 3.4451567969766614, 'discounted_advantage': -0.657525094326032, 'initial_state': 4.357080459594727, 'diff_eval': 2518.002668849708} step=4000
2025-12-06 07:42.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.41it/s, critic_loss=-45, conservative_loss=-48.5, alpha=0.652, actor_loss=-2.02, temp=0.742, temp_loss=0.144] 


2025-12-06 07:43.00 [info     ] CQL_20251206073907: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.005133845329284668, 'time_algorithm_update': 0.036617581129074095, 'critic_loss': -45.001456455230716, 'conservative_loss': -48.529652477264406, 'alpha': 0.6516938327550889, 'actor_loss': -2.0213976607322692, 'temp': 0.7414739436507225, 'temp_loss': 0.14391921159718185, 'time_step': 0.04204610562324524, 'td_error': 1.6561695448835094, 'value_scale': 3.176773426176829, 'discounted_advantage': -0.7261802067122817, 'initial_state': 3.605884075164795, 'diff_eval': 2488.5129262047976} step=5000
2025-12-06 07:43.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.08it/s, critic_loss=-40.8, conservative_loss=-44.5, alpha=0.595, actor_loss=-1.92, temp=0.7, temp_loss=0.101] 


2025-12-06 07:43.46 [info     ] CQL_20251206073907: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.005180695295333862, 'time_algorithm_update': 0.03720200157165527, 'critic_loss': -40.81578628921509, 'conservative_loss': -44.45641139602661, 'alpha': 0.5947673591971397, 'actor_loss': -1.916821717441082, 'temp': 0.699793907046318, 'temp_loss': 0.1014334927489981, 'time_step': 0.042670462846755984, 'td_error': 1.7055205746206763, 'value_scale': 3.0366402478468637, 'discounted_advantage': -0.4212307927923034, 'initial_state': 3.452326774597168, 'diff_eval': 2431.8554749274476} step=6000
2025-12-06 07:43.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.41it/s, critic_loss=-37, conservative_loss=-40.7, alpha=0.543, actor_loss=-1.79, temp=0.662, temp_loss=0.0724] 


2025-12-06 07:44.35 [info     ] CQL_20251206073907: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.005239399671554565, 'time_algorithm_update': 0.038379640579223634, 'critic_loss': -36.961908683776855, 'conservative_loss': -40.68028364944458, 'alpha': 0.5429882707595826, 'actor_loss': -1.7847596955299379, 'temp': 0.6619276177287102, 'temp_loss': 0.07188741757208482, 'time_step': 0.043914945602416995, 'td_error': 1.6724183491680733, 'value_scale': 2.817803552789553, 'discounted_advantage': -0.07859914985886403, 'initial_state': 3.0381217002868652, 'diff_eval': 2526.391369509081} step=7000
2025-12-06 07:44.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.84it/s, critic_loss=-33.5, conservative_loss=-37.2, alpha=0.496, actor_loss=-1.73, temp=0.63, temp_loss=0.0463]


2025-12-06 07:45.22 [info     ] CQL_20251206073907: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.005117573261260986, 'time_algorithm_update': 0.0377145721912384, 'critic_loss': -33.4691125831604, 'conservative_loss': -37.21742411804199, 'alpha': 0.49580220037698747, 'actor_loss': -1.7337925547361375, 'temp': 0.6300269889235497, 'temp_loss': 0.04643921918189153, 'time_step': 0.04311877465248108, 'td_error': 1.7352141596534205, 'value_scale': 2.8109942766859635, 'discounted_advantage': -0.23110272117051386, 'initial_state': 2.927581787109375, 'diff_eval': 2393.867965683168} step=8000
2025-12-06 07:45.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.56it/s, critic_loss=-30.3, conservative_loss=-34, alpha=0.453, actor_loss=-1.7, temp=0.603, temp_loss=0.0311]  


2025-12-06 07:46.08 [info     ] CQL_20251206073907: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.005023411989212036, 'time_algorithm_update': 0.03647785830497742, 'critic_loss': -30.242178750991823, 'conservative_loss': -34.00285351371765, 'alpha': 0.4527803626358509, 'actor_loss': -1.701842423737049, 'temp': 0.6032383909225464, 'temp_loss': 0.030901861144695432, 'time_step': 0.04178314638137817, 'td_error': 1.7484593545556626, 'value_scale': 2.6433705842514326, 'discounted_advantage': -0.45012796740242367, 'initial_state': 2.664217472076416, 'diff_eval': 2355.759130673323} step=9000
2025-12-06 07:46.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.49it/s, critic_loss=-27.3, conservative_loss=-31.1, alpha=0.414, actor_loss=-1.68, temp=0.583, temp_loss=0.0185]


2025-12-06 07:46.54 [info     ] CQL_20251206073907: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.005049459934234619, 'time_algorithm_update': 0.036564185857772824, 'critic_loss': -27.268647804260254, 'conservative_loss': -31.066531284332275, 'alpha': 0.4135265190601349, 'actor_loss': -1.6768267438411713, 'temp': 0.5830915437936783, 'temp_loss': 0.018911648372886704, 'time_step': 0.041895660877227785, 'td_error': 1.6535557606814317, 'value_scale': 2.385615646758734, 'discounted_advantage': -0.4564303105457569, 'initial_state': 2.7914810180664062, 'diff_eval': 2324.473606348249} step=10000
2025-12-06 07:46.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.68it/s, critic_loss=-24.6, conservative_loss=-28.4, alpha=0.378, actor_loss=-1.69, temp=0.568, temp_loss=0.0113]


2025-12-06 07:47.40 [info     ] CQL_20251206073907: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.00497750186920166, 'time_algorithm_update': 0.03629259252548218, 'critic_loss': -24.623186292648317, 'conservative_loss': -28.371215436935426, 'alpha': 0.37770256665349006, 'actor_loss': -1.6912593147754669, 'temp': 0.5683993811011314, 'temp_loss': 0.01090319548593834, 'time_step': 0.04156065344810486, 'td_error': 1.7362421798968364, 'value_scale': 2.9357897668549198, 'discounted_advantage': -0.22822209221356876, 'initial_state': 3.2395501136779785, 'diff_eval': 2196.3153722040156} step=11000
2025-12-06 07:47.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.40it/s, critic_loss=-22.2, conservative_loss=-25.9, alpha=0.345, actor_loss=-1.71, temp=0.559, temp_loss=0.0063]


2025-12-06 07:48.26 [info     ] CQL_20251206073907: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.005105610847473145, 'time_algorithm_update': 0.0366806902885437, 'critic_loss': -22.148003456115724, 'conservative_loss': -25.918566122055054, 'alpha': 0.3449902586936951, 'actor_loss': -1.712779828250408, 'temp': 0.5593292640447617, 'temp_loss': 0.006360662592574954, 'time_step': 0.04207763767242432, 'td_error': 1.7663441033154552, 'value_scale': 2.715857389603553, 'discounted_advantage': -0.6386899810122497, 'initial_state': 2.982231855392456, 'diff_eval': 2318.746455120215} step=12000
2025-12-06 07:48.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.39it/s, critic_loss=-19.9, conservative_loss=-23.7, alpha=0.315, actor_loss=-1.7, temp=0.554, temp_loss=0.00205]  


2025-12-06 07:49.12 [info     ] CQL_20251206073907: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.005084728717803955, 'time_algorithm_update': 0.036739086866378785, 'critic_loss': -19.86474385547638, 'conservative_loss': -23.681344024658202, 'alpha': 0.3151123615205288, 'actor_loss': -1.7042882243990898, 'temp': 0.5536617335677146, 'temp_loss': 0.002058464116300456, 'time_step': 0.04210221576690674, 'td_error': 1.8555427080363918, 'value_scale': 2.5915713801820766, 'discounted_advantage': -0.6626096802341755, 'initial_state': 2.4116709232330322, 'diff_eval': 2157.293142226055} step=13000
2025-12-06 07:49.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.40it/s, critic_loss=-17.8, conservative_loss=-21.6, alpha=0.288, actor_loss=-1.71, temp=0.549, temp_loss=0.0044]


2025-12-06 07:49.59 [info     ] CQL_20251206073907: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.005101485967636109, 'time_algorithm_update': 0.03670709800720215, 'critic_loss': -17.79061981201172, 'conservative_loss': -21.606135486602785, 'alpha': 0.2878313603103161, 'actor_loss': -1.7089591385126115, 'temp': 0.5487972805500031, 'temp_loss': 0.004317334017483517, 'time_step': 0.04208491611480713, 'td_error': 1.9173518959147668, 'value_scale': 3.2290659035311675, 'discounted_advantage': -0.5090191364950168, 'initial_state': 3.4343292713165283, 'diff_eval': 2036.690280683839} step=14000
2025-12-06 07:49.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.38it/s, critic_loss=-15.9, conservative_loss=-19.7, alpha=0.263, actor_loss=-1.73, temp=0.541, temp_loss=0.00819]


2025-12-06 07:50.45 [info     ] CQL_20251206073907: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.005125488519668579, 'time_algorithm_update': 0.036697572708129886, 'critic_loss': -15.847537591934204, 'conservative_loss': -19.708159963607788, 'alpha': 0.2629308883845806, 'actor_loss': -1.730400684416294, 'temp': 0.5410345727801323, 'temp_loss': 0.00806225037248805, 'time_step': 0.04210314559936523, 'td_error': 1.903910994686023, 'value_scale': 2.92864007971344, 'discounted_advantage': -0.8131119105960012, 'initial_state': 2.816865921020508, 'diff_eval': 2048.196074170749} step=15000
2025-12-06 07:50.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.51it/s, critic_loss=-14.1, conservative_loss=-18, alpha=0.24, actor_loss=-1.75, temp=0.533, temp_loss=0.00622]  


2025-12-06 07:51.31 [info     ] CQL_20251206073907: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.00508507490158081, 'time_algorithm_update': 0.036488998889923095, 'critic_loss': -14.061053297996521, 'conservative_loss': -17.982714260101318, 'alpha': 0.240192914173007, 'actor_loss': -1.7546058555841446, 'temp': 0.5327767667770386, 'temp_loss': 0.006294450125424191, 'time_step': 0.04186073279380798, 'td_error': 1.9063748823728743, 'value_scale': 3.548788956503094, 'discounted_advantage': -1.0576982784388853, 'initial_state': 4.563529014587402, 'diff_eval': 2039.1378656516229} step=16000
2025-12-06 07:51.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.39it/s, critic_loss=-12.4, conservative_loss=-16.4, alpha=0.22, actor_loss=-1.81, temp=0.525, temp_loss=0.00636]


2025-12-06 07:52.17 [info     ] CQL_20251206073907: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.005095580816268921, 'time_algorithm_update': 0.036712206840515134, 'critic_loss': -12.400849638938904, 'conservative_loss': -16.402842400550842, 'alpha': 0.21941896356642246, 'actor_loss': -1.8123735616207122, 'temp': 0.5249589132070541, 'temp_loss': 0.006312122479779646, 'time_step': 0.04209615564346313, 'td_error': 2.11031173078008, 'value_scale': 3.1973067534789257, 'discounted_advantage': -0.9550642242677999, 'initial_state': 2.814987897872925, 'diff_eval': 2004.907408549035} step=17000
2025-12-06 07:52.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.85it/s, critic_loss=-10.9, conservative_loss=-15, alpha=0.201, actor_loss=-1.85, temp=0.518, temp_loss=0.00433]   


2025-12-06 07:53.03 [info     ] CQL_20251206073907: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.005077313899993896, 'time_algorithm_update': 0.035935802459716795, 'critic_loss': -10.923305168151856, 'conservative_loss': -14.973858880996705, 'alpha': 0.20044419281184672, 'actor_loss': -1.8497922541499139, 'temp': 0.5182886555790901, 'temp_loss': 0.0044641540686134245, 'time_step': 0.04128520011901855, 'td_error': 2.0152326120423756, 'value_scale': 2.9872184479913586, 'discounted_advantage': -1.184345418345507, 'initial_state': 2.877870559692383, 'diff_eval': 2041.3229603863342} step=18000
2025-12-06 07:53.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.58it/s, critic_loss=-9.57, conservative_loss=-13.7, alpha=0.183, actor_loss=-1.92, temp=0.511, temp_loss=0.00497] 


2025-12-06 07:53.49 [info     ] CQL_20251206073907: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.005071611166000366, 'time_algorithm_update': 0.03641387891769409, 'critic_loss': -9.560853663921357, 'conservative_loss': -13.645990175247192, 'alpha': 0.18311258633434774, 'actor_loss': -1.9245672730207444, 'temp': 0.511260857462883, 'temp_loss': 0.005181048576021567, 'time_step': 0.0417585220336914, 'td_error': 2.0899223634085153, 'value_scale': 3.4600181268607733, 'discounted_advantage': -1.3139898078710757, 'initial_state': 3.8936514854431152, 'diff_eval': 1982.5894707990967} step=19000
2025-12-06 07:53.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.73it/s, critic_loss=-8.32, conservative_loss=-12.5, alpha=0.167, actor_loss=-1.99, temp=0.505, temp_loss=0.00577]


2025-12-06 07:54.34 [info     ] CQL_20251206073907: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.0049884445667266845, 'time_algorithm_update': 0.03624667525291443, 'critic_loss': -8.31846261358261, 'conservative_loss': -12.44971994972229, 'alpha': 0.16728342767059803, 'actor_loss': -1.9938960976600646, 'temp': 0.5048552971482277, 'temp_loss': 0.005561299638589844, 'time_step': 0.04150510144233704, 'td_error': 2.0953280601724917, 'value_scale': 3.260360920676854, 'discounted_advantage': -1.566877787768605, 'initial_state': 3.3776376247406006, 'diff_eval': 2008.8255052095894} step=20000
2025-12-06 07:54.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.95it/s, critic_loss=-7.21, conservative_loss=-11.4, alpha=0.153, actor_loss=-2.07, temp=0.498, temp_loss=0.00513]


2025-12-06 07:55.21 [info     ] CQL_20251206073907: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.005134270668029785, 'time_algorithm_update': 0.03747382950782776, 'critic_loss': -7.209163343429565, 'conservative_loss': -11.352583138465882, 'alpha': 0.15282541267573835, 'actor_loss': -2.067816692829132, 'temp': 0.4976089082956314, 'temp_loss': 0.005129080577287823, 'time_step': 0.04288853693008423, 'td_error': 2.184604712028787, 'value_scale': 3.7812298409982716, 'discounted_advantage': -1.4627229766799235, 'initial_state': 4.245335578918457, 'diff_eval': 1962.61027852504} step=21000
2025-12-06 07:55.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.48it/s, critic_loss=-6.2, conservative_loss=-10.4, alpha=0.14, actor_loss=-2.14, temp=0.491, temp_loss=0.00496]   


2025-12-06 07:56.07 [info     ] CQL_20251206073907: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.00501050877571106, 'time_algorithm_update': 0.036579641819000246, 'critic_loss': -6.1935597224235535, 'conservative_loss': -10.358331323623657, 'alpha': 0.1396140346825123, 'actor_loss': -2.135019461393356, 'temp': 0.4905003986954689, 'temp_loss': 0.005093864869093522, 'time_step': 0.0418804018497467, 'td_error': 2.2926531108651407, 'value_scale': 4.1080168489084175, 'discounted_advantage': -1.6331485606918228, 'initial_state': 4.37346076965332, 'diff_eval': 1981.0817990505207} step=22000
2025-12-06 07:56.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.50it/s, critic_loss=-5.29, conservative_loss=-9.44, alpha=0.128, actor_loss=-2.22, temp=0.483, temp_loss=0.00758]


2025-12-06 07:56.55 [info     ] CQL_20251206073907: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.005700635194778442, 'time_algorithm_update': 0.03779187345504761, 'critic_loss': -5.286514444351196, 'conservative_loss': -9.43832592010498, 'alpha': 0.1275461871549487, 'actor_loss': -2.2224769334793093, 'temp': 0.48279708993434906, 'temp_loss': 0.00743316586012952, 'time_step': 0.04377370882034302, 'td_error': 2.471747966659253, 'value_scale': 3.7484706725335033, 'discounted_advantage': -1.9189532330386878, 'initial_state': 3.383174419403076, 'diff_eval': 1911.7070153791224} step=23000
2025-12-06 07:56.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.31it/s, critic_loss=-4.35, conservative_loss=-8.61, alpha=0.117, actor_loss=-2.32, temp=0.474, temp_loss=0.00383]


2025-12-06 07:57.45 [info     ] CQL_20251206073907: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.006231888771057129, 'time_algorithm_update': 0.03760763263702393, 'critic_loss': -4.350665810108185, 'conservative_loss': -8.605563269615173, 'alpha': 0.11652715508639812, 'actor_loss': -2.3193525205850603, 'temp': 0.4743414555490017, 'temp_loss': 0.0037706534205935896, 'time_step': 0.044141395568847656, 'td_error': 2.218482616646725, 'value_scale': 4.207572354993659, 'discounted_advantage': -1.8914264183508844, 'initial_state': 5.229250431060791, 'diff_eval': 1898.982404142993} step=24000
2025-12-06 07:57.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.24it/s, critic_loss=-3.5, conservative_loss=-7.84, alpha=0.107, actor_loss=-2.44, temp=0.468, temp_loss=0.00589]


2025-12-06 07:58.29 [info     ] CQL_20251206073907: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.004855087280273437, 'time_algorithm_update': 0.0354150698184967, 'critic_loss': -3.4981309797763824, 'conservative_loss': -7.840331685066223, 'alpha': 0.10645979078859091, 'actor_loss': -2.444851356625557, 'temp': 0.46764704301953314, 'temp_loss': 0.006052949576289393, 'time_step': 0.04055125832557678, 'td_error': 2.359901135140898, 'value_scale': 4.277692353449706, 'discounted_advantage': -2.015512657525399, 'initial_state': 4.678842067718506, 'diff_eval': 1884.4764037524208} step=25000
2025-12-06 07:58.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.22it/s, critic_loss=-2.75, conservative_loss=-7.15, alpha=0.0973, actor_loss=-2.52, temp=0.46, temp_loss=0.00764]


2025-12-06 07:59.16 [info     ] CQL_20251206073907: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.005071726799011231, 'time_algorithm_update': 0.036994666814804074, 'critic_loss': -2.7445236704349516, 'conservative_loss': -7.143864199638367, 'alpha': 0.09726483406871557, 'actor_loss': -2.5238208047151565, 'temp': 0.4601493581831455, 'temp_loss': 0.007593876394908875, 'time_step': 0.042360278606414795, 'td_error': 2.660225172519676, 'value_scale': 4.514566818605062, 'discounted_advantage': -2.2776255496362596, 'initial_state': 4.194532871246338, 'diff_eval': 1869.3323923563612} step=26000
2025-12-06 07:59.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.53it/s, critic_loss=-2.07, conservative_loss=-6.51, alpha=0.0889, actor_loss=-2.66, temp=0.451, temp_loss=0.00613]


2025-12-06 08:00.02 [info     ] CQL_20251206073907: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.0050805058479309086, 'time_algorithm_update': 0.03644778275489807, 'critic_loss': -2.065149458169937, 'conservative_loss': -6.508498661518097, 'alpha': 0.08886635075509548, 'actor_loss': -2.656607334494591, 'temp': 0.4508515453636646, 'temp_loss': 0.006337258379673585, 'time_step': 0.04181503343582153, 'td_error': 2.5306500082289642, 'value_scale': 4.972887832457114, 'discounted_advantage': -2.4327762515531703, 'initial_state': 5.583893299102783, 'diff_eval': 1872.2646818338976} step=27000
2025-12-06 08:00.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.48it/s, critic_loss=-1.35, conservative_loss=-5.93, alpha=0.0812, actor_loss=-2.75, temp=0.443, temp_loss=0.00613]


2025-12-06 08:00.48 [info     ] CQL_20251206073907: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.005097513198852539, 'time_algorithm_update': 0.03652939486503601, 'critic_loss': -1.3435174815654756, 'conservative_loss': -5.9321926693916325, 'alpha': 0.08119318109750748, 'actor_loss': -2.7502544212341307, 'temp': 0.4426749899983406, 'temp_loss': 0.006316357642877847, 'time_step': 0.04191574883460999, 'td_error': 2.6947716774849617, 'value_scale': 4.516530767324, 'discounted_advantage': -2.696731877400098, 'initial_state': 4.5871686935424805, 'diff_eval': 1904.7045622096207} step=28000
2025-12-06 08:00.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.46it/s, critic_loss=-0.682, conservative_loss=-5.4, alpha=0.0742, actor_loss=-2.91, temp=0.435, temp_loss=0.00724]


2025-12-06 08:01.34 [info     ] CQL_20251206073907: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.0050752322673797605, 'time_algorithm_update': 0.03653314471244812, 'critic_loss': -0.68149929022789, 'conservative_loss': -5.4009274969100955, 'alpha': 0.07418367134779692, 'actor_loss': -2.9102373976707456, 'temp': 0.4350038926303387, 'temp_loss': 0.007237607820658013, 'time_step': 0.04191255497932434, 'td_error': 2.6590580848863796, 'value_scale': 4.918261156410404, 'discounted_advantage': -2.6640332039351176, 'initial_state': 5.489799976348877, 'diff_eval': 1822.381888786924} step=29000
2025-12-06 08:01.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, critic_loss=-0.125, conservative_loss=-4.92, alpha=0.0678, actor_loss=-3.04, temp=0.426, temp_loss=0.00447]


2025-12-06 08:02.20 [info     ] CQL_20251206073907: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.004941376209259033, 'time_algorithm_update': 0.036051507234573364, 'critic_loss': -0.1253795554637909, 'conservative_loss': -4.9214888567924495, 'alpha': 0.06778156148642302, 'actor_loss': -3.039187609195709, 'temp': 0.42644824981689455, 'temp_loss': 0.004501852774526924, 'time_step': 0.04127860426902771, 'td_error': 2.7077169815047646, 'value_scale': 5.148920276320921, 'discounted_advantage': -3.183096466590659, 'initial_state': 6.151510238647461, 'diff_eval': 1766.0507090486037} step=30000
2025-12-06 08:02.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.11it/s, critic_loss=0.563, conservative_loss=-4.48, alpha=0.062, actor_loss=-3.21, temp=0.421, temp_loss=0.00378]


2025-12-06 08:03.07 [info     ] CQL_20251206073907: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.005160250902175903, 'time_algorithm_update': 0.03712527227401734, 'critic_loss': 0.5688403367996215, 'conservative_loss': -4.480798804283142, 'alpha': 0.061930862560868265, 'actor_loss': -3.212364456653595, 'temp': 0.42107843592762945, 'temp_loss': 0.0036974980536615474, 'time_step': 0.04256819820404053, 'td_error': 2.7637616623954657, 'value_scale': 5.448376249403311, 'discounted_advantage': -3.045368678723241, 'initial_state': 6.169963359832764, 'diff_eval': 1842.2895642351777} step=31000
2025-12-06 08:03.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.66it/s, critic_loss=1.06, conservative_loss=-4.08, alpha=0.0566, actor_loss=-3.33, temp=0.416, temp_loss=0.007]  


2025-12-06 08:03.52 [info     ] CQL_20251206073907: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.004966145992279053, 'time_algorithm_update': 0.03630437564849853, 'critic_loss': 1.0605916695594788, 'conservative_loss': -4.0749861431121825, 'alpha': 0.05658856026828289, 'actor_loss': -3.3274902272224427, 'temp': 0.415860816180706, 'temp_loss': 0.006994408678030595, 'time_step': 0.041564361572265625, 'td_error': 2.9447267632955425, 'value_scale': 5.906158634948592, 'discounted_advantage': -3.5091065810636954, 'initial_state': 6.591010570526123, 'diff_eval': 1871.7258875135383} step=32000
2025-12-06 08:03.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.29it/s, critic_loss=1.57, conservative_loss=-3.71, alpha=0.0517, actor_loss=-3.51, temp=0.408, temp_loss=0.00374]


2025-12-06 08:04.39 [info     ] CQL_20251206073907: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.005115951061248779, 'time_algorithm_update': 0.03684311842918396, 'critic_loss': 1.5728960075378418, 'conservative_loss': -3.711611614704132, 'alpha': 0.05170856088399887, 'actor_loss': -3.5093337185382842, 'temp': 0.4080559585094452, 'temp_loss': 0.003635525593883358, 'time_step': 0.04225209403038025, 'td_error': 3.0777506127360525, 'value_scale': 5.93760347717472, 'discounted_advantage': -3.9819232167032115, 'initial_state': 6.185300350189209, 'diff_eval': 1786.337652674161} step=33000
2025-12-06 08:04.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.61it/s, critic_loss=1.94, conservative_loss=-3.37, alpha=0.0473, actor_loss=-3.7, temp=0.402, temp_loss=0.00731]


2025-12-06 08:05.27 [info     ] CQL_20251206073907: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.005012816190719605, 'time_algorithm_update': 0.03823802757263184, 'critic_loss': 1.9408883998394013, 'conservative_loss': -3.3733230690956115, 'alpha': 0.04725076898187399, 'actor_loss': -3.6992792026996613, 'temp': 0.40212597346305845, 'temp_loss': 0.007482563729165122, 'time_step': 0.04353325629234314, 'td_error': 3.1888808122699004, 'value_scale': 6.288936757148467, 'discounted_advantage': -4.159374725020817, 'initial_state': 6.489083290100098, 'diff_eval': 1801.164510429142} step=34000
2025-12-06 08:05.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.91it/s, critic_loss=2.51, conservative_loss=-3.07, alpha=0.0432, actor_loss=-3.89, temp=0.395, temp_loss=0.00369] 


2025-12-06 08:06.14 [info     ] CQL_20251206073907: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.005135688781738281, 'time_algorithm_update': 0.03757615041732788, 'critic_loss': 2.518350691318512, 'conservative_loss': -3.068761341094971, 'alpha': 0.04317967262491584, 'actor_loss': -3.889242179393768, 'temp': 0.3953042255342007, 'temp_loss': 0.0037600108669139445, 'time_step': 0.04300062417984009, 'td_error': 3.0715102423220952, 'value_scale': 6.592616785808692, 'discounted_advantage': -4.176779204798317, 'initial_state': 7.387994289398193, 'diff_eval': 1831.2666370961729} step=35000
2025-12-06 08:06.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:45<00:00, 21.99it/s, critic_loss=3.09, conservative_loss=-2.79, alpha=0.0395, actor_loss=-4.08, temp=0.389, temp_loss=0.00604]


2025-12-06 08:07.03 [info     ] CQL_20251206073907: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.004942506790161133, 'time_algorithm_update': 0.0395717625617981, 'critic_loss': 3.0829018561840056, 'conservative_loss': -2.7906970903873445, 'alpha': 0.03945805510506034, 'actor_loss': -4.083588171243668, 'temp': 0.38875440979003906, 'temp_loss': 0.005878950549406, 'time_step': 0.0447994601726532, 'td_error': 3.111639810768376, 'value_scale': 6.803090646733395, 'discounted_advantage': -4.8654428761430095, 'initial_state': 8.55517292022705, 'diff_eval': 1814.3156049776828} step=36000
2025-12-06 08:07.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.87it/s, critic_loss=3.51, conservative_loss=-2.54, alpha=0.0361, actor_loss=-4.31, temp=0.382, temp_loss=0.00342]


2025-12-06 08:07.50 [info     ] CQL_20251206073907: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.0052371563911437985, 'time_algorithm_update': 0.03745056056976318, 'critic_loss': 3.5083546130657197, 'conservative_loss': -2.5382137227058412, 'alpha': 0.03605997763574123, 'actor_loss': -4.307675671339035, 'temp': 0.3819641097486019, 'temp_loss': 0.002921909137396142, 'time_step': 0.042989621639251706, 'td_error': 3.492382590232354, 'value_scale': 6.618386767820236, 'discounted_advantage': -4.981562602565111, 'initial_state': 6.566308498382568, 'diff_eval': 1856.965341475297} step=37000
2025-12-06 08:07.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.05it/s, critic_loss=4.03, conservative_loss=-2.31, alpha=0.033, actor_loss=-4.56, temp=0.38, temp_loss=0.00132] 


2025-12-06 08:08.35 [info     ] CQL_20251206073907: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.004888745784759521, 'time_algorithm_update': 0.035701063871383665, 'critic_loss': 4.03237402844429, 'conservative_loss': -2.3065869913101196, 'alpha': 0.03295419555157423, 'actor_loss': -4.555611943006515, 'temp': 0.3796097888052464, 'temp_loss': 0.001428440454066731, 'time_step': 0.04087841033935547, 'td_error': 3.4339586540310547, 'value_scale': 7.080471722510614, 'discounted_advantage': -4.937542559537247, 'initial_state': 7.922677040100098, 'diff_eval': 1801.91732288059} step=38000
2025-12-06 08:08.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.50it/s, critic_loss=4.32, conservative_loss=-2.1, alpha=0.0301, actor_loss=-4.77, temp=0.376, temp_loss=0.00499] 


2025-12-06 08:09.21 [info     ] CQL_20251206073907: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.005073279142379761, 'time_algorithm_update': 0.036500142812728884, 'critic_loss': 4.324624335765838, 'conservative_loss': -2.0973736695051195, 'alpha': 0.030115688398480417, 'actor_loss': -4.764313419342041, 'temp': 0.37550506901741026, 'temp_loss': 0.004711263449746184, 'time_step': 0.04186490797996521, 'td_error': 3.561493749894127, 'value_scale': 7.194427303583435, 'discounted_advantage': -5.2770591170502374, 'initial_state': 7.333906650543213, 'diff_eval': 1974.443566201574} step=39000
2025-12-06 08:09.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.63it/s, critic_loss=4.74, conservative_loss=-1.91, alpha=0.0275, actor_loss=-4.95, temp=0.372, temp_loss=0.0024] 


2025-12-06 08:10.07 [info     ] CQL_20251206073907: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.005009841918945312, 'time_algorithm_update': 0.036358445644378665, 'critic_loss': 4.746718966603279, 'conservative_loss': -1.9088781840801239, 'alpha': 0.02752076949365437, 'actor_loss': -4.9516586039066315, 'temp': 0.37158153182268144, 'temp_loss': 0.002474932812852785, 'time_step': 0.04164800453186035, 'td_error': 3.403647009939865, 'value_scale': 7.341605337442829, 'discounted_advantage': -5.421338479788363, 'initial_state': 8.145218849182129, 'diff_eval': 1867.3178937184603} step=40000
2025-12-06 08:10.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.16it/s, critic_loss=4.98, conservative_loss=-1.73, alpha=0.0252, actor_loss=-5.13, temp=0.368, temp_loss=0.00365]


2025-12-06 08:10.54 [info     ] CQL_20251206073907: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.0052331464290618895, 'time_algorithm_update': 0.036994194984436034, 'critic_loss': 4.984192407011986, 'conservative_loss': -1.733495779633522, 'alpha': 0.025150244805961848, 'actor_loss': -5.1297977333068845, 'temp': 0.3675644307434559, 'temp_loss': 0.003282720314105973, 'time_step': 0.04250517177581787, 'td_error': 3.514048086123396, 'value_scale': 7.363952737323446, 'discounted_advantage': -5.48638516106696, 'initial_state': 8.045230865478516, 'diff_eval': 1926.6158172448672} step=41000
2025-12-06 08:10.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.35it/s, critic_loss=5.37, conservative_loss=-1.58, alpha=0.023, actor_loss=-5.29, temp=0.363, temp_loss=0.00323] 


2025-12-06 08:11.40 [info     ] CQL_20251206073907: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.005141809225082398, 'time_algorithm_update': 0.03672021913528442, 'critic_loss': 5.362808839082718, 'conservative_loss': -1.5778734301328659, 'alpha': 0.022984565367922188, 'actor_loss': -5.288392464637757, 'temp': 0.3631708306670189, 'temp_loss': 0.0034128265356412157, 'time_step': 0.04214583659172058, 'td_error': 3.4966257327844823, 'value_scale': 7.897224293344117, 'discounted_advantage': -5.916456711217141, 'initial_state': 9.230891227722168, 'diff_eval': 2060.746727649285} step=42000
2025-12-06 08:11.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.77it/s, critic_loss=5.72, conservative_loss=-1.44, alpha=0.021, actor_loss=-5.45, temp=0.359, temp_loss=0.00417]


2025-12-06 08:12.26 [info     ] CQL_20251206073907: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.0050049455165863035, 'time_algorithm_update': 0.0360586998462677, 'critic_loss': 5.715723424196243, 'conservative_loss': -1.4358567136526108, 'alpha': 0.021004249665886165, 'actor_loss': -5.446244620323181, 'temp': 0.35868524813652036, 'temp_loss': 0.0037083202144131064, 'time_step': 0.04135637736320496, 'td_error': 3.6896783465226743, 'value_scale': 8.154872287425453, 'discounted_advantage': -5.84702794851291, 'initial_state': 8.72293472290039, 'diff_eval': 1857.3268802470702} step=43000
2025-12-06 08:12.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.37it/s, critic_loss=5.93, conservative_loss=-1.31, alpha=0.0192, actor_loss=-5.6, temp=0.356, temp_loss=0.00259]  


2025-12-06 08:13.12 [info     ] CQL_20251206073907: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.005127187728881836, 'time_algorithm_update': 0.03666561484336853, 'critic_loss': 5.93747985804081, 'conservative_loss': -1.3062524777650832, 'alpha': 0.01919356130436063, 'actor_loss': -5.602869819641113, 'temp': 0.3559324730336666, 'temp_loss': 0.0026283596351277085, 'time_step': 0.042090588808059694, 'td_error': 3.7098694116737714, 'value_scale': 8.013469266780158, 'discounted_advantage': -6.41067801153574, 'initial_state': 8.944772720336914, 'diff_eval': 1964.6133732780734} step=44000
2025-12-06 08:13.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.63it/s, critic_loss=6.25, conservative_loss=-1.19, alpha=0.0175, actor_loss=-5.78, temp=0.35, temp_loss=0.00572] 


2025-12-06 08:13.58 [info     ] CQL_20251206073907: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.005069408655166626, 'time_algorithm_update': 0.036302752256393435, 'critic_loss': 6.239621050596237, 'conservative_loss': -1.1880204563140868, 'alpha': 0.01754012319818139, 'actor_loss': -5.777633134841919, 'temp': 0.34973041427135465, 'temp_loss': 0.0058420155753847215, 'time_step': 0.0416561963558197, 'td_error': 3.6963256909025963, 'value_scale': 7.86255353883499, 'discounted_advantage': -5.932960712789582, 'initial_state': 9.373254776000977, 'diff_eval': 1944.325104877546} step=45000
2025-12-06 08:13.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.37it/s, critic_loss=6.63, conservative_loss=-1.08, alpha=0.016, actor_loss=-5.93, temp=0.343, temp_loss=0.00744]


2025-12-06 08:14.45 [info     ] CQL_20251206073907: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.005073966741561889, 'time_algorithm_update': 0.03677124285697937, 'critic_loss': 6.624063892841339, 'conservative_loss': -1.0796352192163468, 'alpha': 0.01602909608464688, 'actor_loss': -5.93105091381073, 'temp': 0.34252752774953843, 'temp_loss': 0.007424229964381084, 'time_step': 0.04212652635574341, 'td_error': 3.643970567999892, 'value_scale': 8.081998790319219, 'discounted_advantage': -6.06156811232694, 'initial_state': 10.324860572814941, 'diff_eval': 1959.3150713266602} step=46000
2025-12-06 08:14.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.03it/s, critic_loss=6.64, conservative_loss=-0.984, alpha=0.0147, actor_loss=-6.04, temp=0.334, temp_loss=0.00636]


2025-12-06 08:15.32 [info     ] CQL_20251206073907: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.005059353351593018, 'time_algorithm_update': 0.0373606972694397, 'critic_loss': 6.639966672062874, 'conservative_loss': -0.9838137004971504, 'alpha': 0.01464771343767643, 'actor_loss': -6.043312568187714, 'temp': 0.3334699986577034, 'temp_loss': 0.006222669738461264, 'time_step': 0.042713564157485964, 'td_error': 3.64459208901808, 'value_scale': 7.9992215359389895, 'discounted_advantage': -6.421018884022169, 'initial_state': 9.38099479675293, 'diff_eval': 1970.2325070774868} step=47000
2025-12-06 08:15.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.91it/s, critic_loss=7.14, conservative_loss=-0.896, alpha=0.0134, actor_loss=-6.17, temp=0.328, temp_loss=0.00275] 


2025-12-06 08:16.19 [info     ] CQL_20251206073907: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.00552991247177124, 'time_algorithm_update': 0.03714632368087768, 'critic_loss': 7.131197799444198, 'conservative_loss': -0.895764764547348, 'alpha': 0.013384121617302298, 'actor_loss': -6.1693910555839535, 'temp': 0.3281156245470047, 'temp_loss': 0.002689341475837864, 'time_step': 0.04296227216720581, 'td_error': 3.8083655900202156, 'value_scale': 8.755232781516849, 'discounted_advantage': -6.463120376077157, 'initial_state': 10.286645889282227, 'diff_eval': 2096.73810973395} step=48000
2025-12-06 08:16.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.09it/s, critic_loss=7.33, conservative_loss=-0.815, alpha=0.0122, actor_loss=-6.27, temp=0.324, temp_loss=0.00465] 


2025-12-06 08:17.06 [info     ] CQL_20251206073907: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.005108274936676025, 'time_algorithm_update': 0.03723572731018066, 'critic_loss': 7.319326258182525, 'conservative_loss': -0.8142217014431954, 'alpha': 0.012230347618460655, 'actor_loss': -6.266000455379486, 'temp': 0.32382834756374357, 'temp_loss': 0.004665547171141952, 'time_step': 0.04261727356910706, 'td_error': 3.885030431438885, 'value_scale': 8.907145780837515, 'discounted_advantage': -5.95083632261415, 'initial_state': 10.957627296447754, 'diff_eval': 2133.051667072515} step=49000
2025-12-06 08:17.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.90it/s, critic_loss=7.59, conservative_loss=-0.74, alpha=0.0112, actor_loss=-6.37, temp=0.319, temp_loss=0.00358]


2025-12-06 08:17.53 [info     ] CQL_20251206073907: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.005068957805633545, 'time_algorithm_update': 0.037631399154663084, 'critic_loss': 7.580893954873085, 'conservative_loss': -0.7396758276224137, 'alpha': 0.011177144596353173, 'actor_loss': -6.37246821308136, 'temp': 0.3188235209584236, 'temp_loss': 0.0035526188572403044, 'time_step': 0.04299251198768616, 'td_error': 4.034165338199263, 'value_scale': 8.318477112027033, 'discounted_advantage': -6.320273080197288, 'initial_state': 9.146047592163086, 'diff_eval': 2124.9256907566464} step=50000
2025-12-06 08:17.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.58it/s, critic_loss=7.75, conservative_loss=-0.673, alpha=0.0102, actor_loss=-6.45, temp=0.313, temp_loss=0.00721]


2025-12-06 08:18.39 [info     ] CQL_20251206073907: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.005029948472976685, 'time_algorithm_update': 0.03637496066093445, 'critic_loss': 7.74243719124794, 'conservative_loss': -0.6729147944450379, 'alpha': 0.010214589897543192, 'actor_loss': -6.453004939556122, 'temp': 0.3130153444111347, 'temp_loss': 0.007164687516051344, 'time_step': 0.04169873332977295, 'td_error': 3.812331069590631, 'value_scale': 8.369160973464592, 'discounted_advantage': -6.337493815620577, 'initial_state': 11.110119819641113, 'diff_eval': 2199.7397536014523} step=51000
2025-12-06 08:18.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.20it/s, critic_loss=7.95, conservative_loss=-0.613, alpha=0.00934, actor_loss=-6.53, temp=0.303, temp_loss=0.00962]


2025-12-06 08:19.26 [info     ] CQL_20251206073907: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.005114576578140259, 'time_algorithm_update': 0.03702933287620545, 'critic_loss': 7.952237857580185, 'conservative_loss': -0.6129289274811744, 'alpha': 0.009333844423294067, 'actor_loss': -6.535364498138428, 'temp': 0.3029271028339863, 'temp_loss': 0.009780537849990651, 'time_step': 0.042427945852279665, 'td_error': 3.9450554375813462, 'value_scale': 8.425128591240657, 'discounted_advantage': -6.178876325921663, 'initial_state': 10.183189392089844, 'diff_eval': 2106.973870138522} step=52000
2025-12-06 08:19.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.39it/s, critic_loss=8.08, conservative_loss=-0.558, alpha=0.00853, actor_loss=-6.61, temp=0.295, temp_loss=0.00462]


2025-12-06 08:20.12 [info     ] CQL_20251206073907: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.0051072373390197755, 'time_algorithm_update': 0.036604358434677124, 'critic_loss': 8.073658905029298, 'conservative_loss': -0.5574281346201897, 'alpha': 0.008529600137844682, 'actor_loss': -6.605251555919647, 'temp': 0.29491737404465673, 'temp_loss': 0.004582430717186071, 'time_step': 0.0420277087688446, 'td_error': 4.076998946754096, 'value_scale': 8.39945787350957, 'discounted_advantage': -6.119111646226344, 'initial_state': 9.583580017089844, 'diff_eval': 2294.5169342160816} step=53000
2025-12-06 08:20.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.65it/s, critic_loss=8.4, conservative_loss=-0.509, alpha=0.0078, actor_loss=-6.65, temp=0.289, temp_loss=0.00262]  


2025-12-06 08:20.58 [info     ] CQL_20251206073907: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.005085648775100708, 'time_algorithm_update': 0.03615221881866455, 'critic_loss': 8.413284411668778, 'conservative_loss': -0.5092443180084228, 'alpha': 0.007793887332547456, 'actor_loss': -6.651056746959687, 'temp': 0.28915074992179873, 'temp_loss': 0.0027664183218730613, 'time_step': 0.04153628396987915, 'td_error': 4.164618962551553, 'value_scale': 8.299260008934189, 'discounted_advantage': -5.47133909909714, 'initial_state': 9.061933517456055, 'diff_eval': 2040.2647059532878} step=54000
2025-12-06 08:20.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.48it/s, critic_loss=8.34, conservative_loss=-0.463, alpha=0.00712, actor_loss=-6.66, temp=0.286, temp_loss=0.00458]


2025-12-06 08:21.44 [info     ] CQL_20251206073907: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.005016691446304321, 'time_algorithm_update': 0.03656160521507263, 'critic_loss': 8.332393500089646, 'conservative_loss': -0.46285974654555323, 'alpha': 0.007121105830185115, 'actor_loss': -6.6583892450332645, 'temp': 0.28587887150049207, 'temp_loss': 0.004594521098420955, 'time_step': 0.04187148833274841, 'td_error': 3.9984070222788923, 'value_scale': 7.945580504431834, 'discounted_advantage': -5.991976241880471, 'initial_state': 9.388293266296387, 'diff_eval': 2224.0250464156916} step=55000
2025-12-06 08:21.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.47it/s, critic_loss=8.62, conservative_loss=-0.423, alpha=0.00651, actor_loss=-6.65, temp=0.28, temp_loss=0.0045] 


2025-12-06 08:22.30 [info     ] CQL_20251206073907: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.005080770969390869, 'time_algorithm_update': 0.0365687255859375, 'critic_loss': 8.610962099075318, 'conservative_loss': -0.42239976447820665, 'alpha': 0.006506396070122719, 'actor_loss': -6.655828886032104, 'temp': 0.27964237692952154, 'temp_loss': 0.004631760091055185, 'time_step': 0.041940127611160276, 'td_error': 3.794511761767683, 'value_scale': 8.271377251489746, 'discounted_advantage': -5.57517470915198, 'initial_state': 11.728243827819824, 'diff_eval': 2267.509530710009} step=56000
2025-12-06 08:22.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.69it/s, critic_loss=8.52, conservative_loss=-0.385, alpha=0.00595, actor_loss=-6.66, temp=0.275, temp_loss=0.00411]


2025-12-06 08:23.15 [info     ] CQL_20251206073907: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.004976126432418823, 'time_algorithm_update': 0.03622150015830994, 'critic_loss': 8.507142285585404, 'conservative_loss': -0.38520981669425963, 'alpha': 0.005944273183122278, 'actor_loss': -6.656500821113586, 'temp': 0.2748923728466034, 'temp_loss': 0.004211105507565662, 'time_step': 0.04149025440216064, 'td_error': 3.9479064126111316, 'value_scale': 7.97358969355522, 'discounted_advantage': -5.637034506642697, 'initial_state': 10.156288146972656, 'diff_eval': 2336.8177473764504} step=57000
2025-12-06 08:23.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.51it/s, critic_loss=8.64, conservative_loss=-0.35, alpha=0.00543, actor_loss=-6.64, temp=0.27, temp_loss=0.00384]   


2025-12-06 08:24.02 [info     ] CQL_20251206073907: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.005077095985412597, 'time_algorithm_update': 0.036486626863479615, 'critic_loss': 8.639668047189712, 'conservative_loss': -0.349770965218544, 'alpha': 0.005431556568015367, 'actor_loss': -6.639164984703064, 'temp': 0.2700903144478798, 'temp_loss': 0.003827058132388629, 'time_step': 0.04185708141326904, 'td_error': 3.756530067078762, 'value_scale': 7.795735674352079, 'discounted_advantage': -5.993444015899726, 'initial_state': 10.445573806762695, 'diff_eval': 2343.8230086663702} step=58000
2025-12-06 08:24.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.28it/s, critic_loss=8.67, conservative_loss=-0.319, alpha=0.00497, actor_loss=-6.68, temp=0.266, temp_loss=0.00291]


2025-12-06 08:24.48 [info     ] CQL_20251206073907: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.005216153860092163, 'time_algorithm_update': 0.03677241373062134, 'critic_loss': 8.664067161560059, 'conservative_loss': -0.31861404186487197, 'alpha': 0.004963308354374021, 'actor_loss': -6.682181357860565, 'temp': 0.2655773304402828, 'temp_loss': 0.002695022749598138, 'time_step': 0.04227440524101257, 'td_error': 4.002092150294065, 'value_scale': 8.42825030260122, 'discounted_advantage': -6.158826876386571, 'initial_state': 11.123601913452148, 'diff_eval': 2430.0883978810584} step=59000
2025-12-06 08:24.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.48it/s, critic_loss=8.77, conservative_loss=-0.291, alpha=0.00454, actor_loss=-6.77, temp=0.266, temp_loss=0.000419]


2025-12-06 08:25.34 [info     ] CQL_20251206073907: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.005078770160675049, 'time_algorithm_update': 0.036555623054504396, 'critic_loss': 8.776454751491547, 'conservative_loss': -0.29102086517214776, 'alpha': 0.004534680617507547, 'actor_loss': -6.769330818653106, 'temp': 0.26569916313886643, 'temp_loss': 0.00031817095493897793, 'time_step': 0.041925199508666994, 'td_error': 4.011618254479102, 'value_scale': 8.309181831238444, 'discounted_advantage': -6.265237022976041, 'initial_state': 10.0518159866333, 'diff_eval': 2418.6740783376436} step=60000
2025-12-06 08:25.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.48it/s, critic_loss=8.98, conservative_loss=-0.265, alpha=0.00414, actor_loss=-6.93, temp=0.262, temp_loss=0.00458]


2025-12-06 08:26.22 [info     ] CQL_20251206073907: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.005132366895675659, 'time_algorithm_update': 0.03831100392341614, 'critic_loss': 8.97583888244629, 'conservative_loss': -0.26446456207334995, 'alpha': 0.004143292397260666, 'actor_loss': -6.931443615436554, 'temp': 0.26179893508553503, 'temp_loss': 0.0045578308545518665, 'time_step': 0.04374422335624695, 'td_error': 3.8837773234166684, 'value_scale': 8.449647827437717, 'discounted_advantage': -6.3892305485983565, 'initial_state': 11.106816291809082, 'diff_eval': 2437.4432726182404} step=61000
2025-12-06 08:26.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.84it/s, critic_loss=9.13, conservative_loss=-0.24, alpha=0.00379, actor_loss=-7.09, temp=0.257, temp_loss=0.0033] 


2025-12-06 08:27.09 [info     ] CQL_20251206073907: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.005067464351654053, 'time_algorithm_update': 0.03770240759849548, 'critic_loss': 9.128719729185104, 'conservative_loss': -0.2400455470532179, 'alpha': 0.003786412896588445, 'actor_loss': -7.090918877124786, 'temp': 0.25715618005394936, 'temp_loss': 0.0033565515810623763, 'time_step': 0.0430530309677124, 'td_error': 4.011571475427819, 'value_scale': 8.591603736913852, 'discounted_advantage': -6.591893009971557, 'initial_state': 10.955371856689453, 'diff_eval': 2501.276084306756} step=62000
2025-12-06 08:27.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.64it/s, critic_loss=9.36, conservative_loss=-0.218, alpha=0.00346, actor_loss=-7.3, temp=0.253, temp_loss=0.00172]


2025-12-06 08:27.57 [info     ] CQL_20251206073907: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.005311422109603882, 'time_algorithm_update': 0.037823689460754394, 'critic_loss': 9.361216698884965, 'conservative_loss': -0.2180666143000126, 'alpha': 0.003460786696290597, 'actor_loss': -7.305423896312714, 'temp': 0.25309120893478393, 'temp_loss': 0.0013956971240695565, 'time_step': 0.043437021493911744, 'td_error': 4.45052634457065, 'value_scale': 8.743988501844024, 'discounted_advantage': -6.638120486838021, 'initial_state': 10.065646171569824, 'diff_eval': 2302.436815832415} step=63000
2025-12-06 08:27.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.26it/s, critic_loss=9.64, conservative_loss=-0.199, alpha=0.00316, actor_loss=-7.48, temp=0.252, temp_loss=0.00201] 


2025-12-06 08:28.44 [info     ] CQL_20251206073907: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.00505967903137207, 'time_algorithm_update': 0.03696199035644531, 'critic_loss': 9.629380883216857, 'conservative_loss': -0.1986029133349657, 'alpha': 0.003162859984440729, 'actor_loss': -7.483367936611176, 'temp': 0.2520367544591427, 'temp_loss': 0.00195685180707369, 'time_step': 0.042307221174240114, 'td_error': 4.253309681147961, 'value_scale': 9.147604716321093, 'discounted_advantage': -7.292674613173956, 'initial_state': 12.000406265258789, 'diff_eval': 2648.2379989966194} step=64000
2025-12-06 08:28.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.32it/s, critic_loss=9.83, conservative_loss=-0.181, alpha=0.00289, actor_loss=-7.65, temp=0.252, temp_loss=-0.00106]


2025-12-06 08:29.30 [info     ] CQL_20251206073907: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.005103875875473022, 'time_algorithm_update': 0.03682274889945984, 'critic_loss': 9.836160068511964, 'conservative_loss': -0.18108987464010715, 'alpha': 0.0028898936749901624, 'actor_loss': -7.652460811138153, 'temp': 0.2516268503367901, 'temp_loss': -0.0010835992539068684, 'time_step': 0.042212521076202394, 'td_error': 4.449927701105235, 'value_scale': 9.42091203623975, 'discounted_advantage': -7.588145269980782, 'initial_state': 12.452312469482422, 'diff_eval': 2424.773080305119} step=65000
2025-12-06 08:29.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.85it/s, critic_loss=10.2, conservative_loss=-0.164, alpha=0.00264, actor_loss=-7.77, temp=0.25, temp_loss=0.0025] 


2025-12-06 08:30.15 [info     ] CQL_20251206073907: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.004987619638442993, 'time_algorithm_update': 0.036008692264556885, 'critic_loss': 10.182760760545731, 'conservative_loss': -0.16414744436740875, 'alpha': 0.0026409493302926423, 'actor_loss': -7.772416184425354, 'temp': 0.2504835645854473, 'temp_loss': 0.0025406384652014824, 'time_step': 0.041279050588607785, 'td_error': 4.543465009428821, 'value_scale': 9.48634025672168, 'discounted_advantage': -7.793827793959209, 'initial_state': 12.747337341308594, 'diff_eval': 2547.8807708703644} step=66000
2025-12-06 08:30.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.54it/s, critic_loss=10.4, conservative_loss=-0.15, alpha=0.00241, actor_loss=-7.91, temp=0.25, temp_loss=-0.000678]


2025-12-06 08:31.01 [info     ] CQL_20251206073907: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.005076136112213135, 'time_algorithm_update': 0.03648277449607849, 'critic_loss': 10.418423275232316, 'conservative_loss': -0.14954227773845197, 'alpha': 0.0024135160245932637, 'actor_loss': -7.913095699310302, 'temp': 0.25004978331923483, 'temp_loss': -0.0005062757001142018, 'time_step': 0.04183521866798401, 'td_error': 4.789400283958543, 'value_scale': 9.977590634614035, 'discounted_advantage': -7.803104144777095, 'initial_state': 12.215435981750488, 'diff_eval': 2502.152600262876} step=67000
2025-12-06 08:31.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.74it/s, critic_loss=10.6, conservative_loss=-0.137, alpha=0.00221, actor_loss=-7.97, temp=0.25, temp_loss=-0.000758]


2025-12-06 08:31.47 [info     ] CQL_20251206073907: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.004991629123687744, 'time_algorithm_update': 0.03619468641281128, 'critic_loss': 10.59797712278366, 'conservative_loss': -0.13667219313979148, 'alpha': 0.0022054172006901354, 'actor_loss': -7.969194283008576, 'temp': 0.24985820515453816, 'temp_loss': -0.0006913028543349356, 'time_step': 0.0414567711353302, 'td_error': 5.223228768922555, 'value_scale': 9.426526642809657, 'discounted_advantage': -8.016049238009346, 'initial_state': 10.55362319946289, 'diff_eval': 2510.216905260035} step=68000
2025-12-06 08:31.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.24it/s, critic_loss=10.7, conservative_loss=-0.125, alpha=0.00202, actor_loss=-8.03, temp=0.25, temp_loss=9.49e-5]  


2025-12-06 08:32.34 [info     ] CQL_20251206073907: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.005183392763137817, 'time_algorithm_update': 0.036866032361984255, 'critic_loss': 10.695535036325454, 'conservative_loss': -0.12457687432318926, 'alpha': 0.002014761149184778, 'actor_loss': -8.027152472496033, 'temp': 0.2503411857187748, 'temp_loss': 0.00025157206354197115, 'time_step': 0.04234885287284851, 'td_error': 5.380172906915027, 'value_scale': 9.8188739646693, 'discounted_advantage': -7.494126805673414, 'initial_state': 10.503244400024414, 'diff_eval': 2563.9340465188848} step=69000
2025-12-06 08:32.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.45it/s, critic_loss=11.2, conservative_loss=-0.113, alpha=0.00184, actor_loss=-8.04, temp=0.249, temp_loss=0.003] 


2025-12-06 08:33.20 [info     ] CQL_20251206073907: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.005073914527893067, 'time_algorithm_update': 0.0365391857624054, 'critic_loss': 11.169748548030853, 'conservative_loss': -0.11325116739422082, 'alpha': 0.0018408919397043064, 'actor_loss': -8.039692487716675, 'temp': 0.24889653943479062, 'temp_loss': 0.003153074805974029, 'time_step': 0.041923797607421875, 'td_error': 5.124432318950089, 'value_scale': 10.235704030348215, 'discounted_advantage': -8.118760117798569, 'initial_state': 11.979413986206055, 'diff_eval': 2677.197906617845} step=70000
2025-12-06 08:33.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.29it/s, critic_loss=11.2, conservative_loss=-0.103, alpha=0.00168, actor_loss=-8.11, temp=0.247, temp_loss=-0.00141]


2025-12-06 08:34.06 [info     ] CQL_20251206073907: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.005110301733016968, 'time_algorithm_update': 0.03684977865219116, 'critic_loss': 11.19733231973648, 'conservative_loss': -0.10331497722864151, 'alpha': 0.0016820854847319423, 'actor_loss': -8.116559637546539, 'temp': 0.2471742361187935, 'temp_loss': -0.0012417182348435744, 'time_step': 0.04226121187210083, 'td_error': 5.266660335386214, 'value_scale': 10.38672709700785, 'discounted_advantage': -7.989453084098398, 'initial_state': 11.937198638916016, 'diff_eval': 2639.8130798101515} step=71000
2025-12-06 08:34.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.37it/s, critic_loss=11.4, conservative_loss=-0.0943, alpha=0.00154, actor_loss=-8.08, temp=0.246, temp_loss=0.0028] 


2025-12-06 08:34.53 [info     ] CQL_20251206073907: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.005097326993942261, 'time_algorithm_update': 0.036695513248443605, 'critic_loss': 11.429136399030686, 'conservative_loss': -0.09421472460031509, 'alpha': 0.0015368377773556858, 'actor_loss': -8.084928981304168, 'temp': 0.24643848258256912, 'temp_loss': 0.0029550446142675357, 'time_step': 0.042076438188552855, 'td_error': 5.563427322965968, 'value_scale': 9.993325401761874, 'discounted_advantage': -8.223543265458384, 'initial_state': 11.87255859375, 'diff_eval': 2830.933338977354} step=72000
2025-12-06 08:34.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.90it/s, critic_loss=11.4, conservative_loss=-0.0857, alpha=0.0014, actor_loss=-8.13, temp=0.245, temp_loss=-6.23e-5] 


2025-12-06 08:35.38 [info     ] CQL_20251206073907: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.004922242164611817, 'time_algorithm_update': 0.035955508947372435, 'critic_loss': 11.39165212249756, 'conservative_loss': -0.08567160351574421, 'alpha': 0.0014041564048966394, 'actor_loss': -8.131196815013885, 'temp': 0.24470271903276444, 'temp_loss': -0.00010394508216995747, 'time_step': 0.04117580533027649, 'td_error': 5.645708503658208, 'value_scale': 10.4112107494732, 'discounted_advantage': -8.324949759655652, 'initial_state': 11.434531211853027, 'diff_eval': 2789.92826052198} step=73000
2025-12-06 08:35.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.82it/s, critic_loss=11.6, conservative_loss=-0.0781, alpha=0.00128, actor_loss=-8.17, temp=0.247, temp_loss=-0.000799]


2025-12-06 08:36.26 [info     ] CQL_20251206073907: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.005163400173187256, 'time_algorithm_update': 0.037688884735107425, 'critic_loss': 11.655523245334626, 'conservative_loss': -0.07808571415394544, 'alpha': 0.0012830832271138206, 'actor_loss': -8.16853750514984, 'temp': 0.24652128857374192, 'temp_loss': -0.0006920204615453258, 'time_step': 0.04314125251770019, 'td_error': 5.533995010906312, 'value_scale': 10.55671930670885, 'discounted_advantage': -8.288711245567539, 'initial_state': 12.956660270690918, 'diff_eval': 2861.597033863407} step=74000
2025-12-06 08:36.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.52it/s, critic_loss=11.7, conservative_loss=-0.0714, alpha=0.00117, actor_loss=-8.17, temp=0.245, temp_loss=9.14e-5] 


2025-12-06 08:37.12 [info     ] CQL_20251206073907: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.004956024408340454, 'time_algorithm_update': 0.0366006498336792, 'critic_loss': 11.696943147182465, 'conservative_loss': -0.07139874564856291, 'alpha': 0.00117219604679849, 'actor_loss': -8.167871455192566, 'temp': 0.2452526987493038, 'temp_loss': -0.00018505882052704693, 'time_step': 0.041849725246429444, 'td_error': 5.498495940780585, 'value_scale': 10.195432639413038, 'discounted_advantage': -8.101100895255348, 'initial_state': 13.48447036743164, 'diff_eval': 2938.410944933232} step=75000
2025-12-06 08:37.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.38it/s, critic_loss=12, conservative_loss=-0.0649, alpha=0.00107, actor_loss=-8.2, temp=0.244, temp_loss=0.00222]  


2025-12-06 08:37.58 [info     ] CQL_20251206073907: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.005086166143417358, 'time_algorithm_update': 0.036701860666275024, 'critic_loss': 12.029622333049774, 'conservative_loss': -0.06484579910337925, 'alpha': 0.0010710341084050016, 'actor_loss': -8.202644748210908, 'temp': 0.24377736616134643, 'temp_loss': 0.002211161872372031, 'time_step': 0.04207650899887085, 'td_error': 5.547856172212412, 'value_scale': 10.38481505970843, 'discounted_advantage': -8.047896759493087, 'initial_state': 12.912042617797852, 'diff_eval': 3008.726435830066} step=76000
2025-12-06 08:37.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.39it/s, critic_loss=12.1, conservative_loss=-0.0591, alpha=0.000979, actor_loss=-8.24, temp=0.242, temp_loss=0.00134]


2025-12-06 08:38.45 [info     ] CQL_20251206073907: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.005165416479110718, 'time_algorithm_update': 0.036613964080810545, 'critic_loss': 12.047662247657776, 'conservative_loss': -0.059119663950055835, 'alpha': 0.000978594086074736, 'actor_loss': -8.234453227519989, 'temp': 0.24237143243849277, 'temp_loss': 0.0011808534598676487, 'time_step': 0.04206380152702332, 'td_error': 5.879275595875687, 'value_scale': 10.71960393242438, 'discounted_advantage': -7.864814933629922, 'initial_state': 13.013208389282227, 'diff_eval': 2979.5087166770027} step=77000
2025-12-06 08:38.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.32it/s, critic_loss=12, conservative_loss=-0.0539, alpha=0.000894, actor_loss=-8.27, temp=0.24, temp_loss=0.00255]   


2025-12-06 08:39.31 [info     ] CQL_20251206073907: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.005066064357757569, 'time_algorithm_update': 0.036837202548980715, 'critic_loss': 12.059105919122697, 'conservative_loss': -0.053919907737523316, 'alpha': 0.0008941365910577587, 'actor_loss': -8.269248611450195, 'temp': 0.2402938215881586, 'temp_loss': 0.0025135464570485056, 'time_step': 0.04219765567779541, 'td_error': 5.826240283468401, 'value_scale': 10.282793956543323, 'discounted_advantage': -8.198555769757277, 'initial_state': 12.50649642944336, 'diff_eval': 3044.8164608663683} step=78000
2025-12-06 08:39.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.57it/s, critic_loss=11.9, conservative_loss=-0.0492, alpha=0.000817, actor_loss=-8.31, temp=0.238, temp_loss=0.000989]


2025-12-06 08:40.17 [info     ] CQL_20251206073907: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.005046250104904175, 'time_algorithm_update': 0.036435826539993284, 'critic_loss': 11.876947460651397, 'conservative_loss': -0.0491506585367024, 'alpha': 0.0008169174735085107, 'actor_loss': -8.307161604881287, 'temp': 0.23787923927605154, 'temp_loss': 0.0009455183462705463, 'time_step': 0.04177530789375305, 'td_error': 5.743318226919596, 'value_scale': 10.25722697620145, 'discounted_advantage': -8.22057910554089, 'initial_state': 12.598638534545898, 'diff_eval': 2990.5714722695275} step=79000
2025-12-06 08:40.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.49it/s, critic_loss=12, conservative_loss=-0.0448, alpha=0.000747, actor_loss=-8.34, temp=0.237, temp_loss=0.00125]   


2025-12-06 08:41.03 [info     ] CQL_20251206073907: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.00510240364074707, 'time_algorithm_update': 0.03649482536315918, 'critic_loss': 11.999851856708526, 'conservative_loss': -0.04477906073257327, 'alpha': 0.0007463700659573079, 'actor_loss': -8.343162760734558, 'temp': 0.237105309009552, 'temp_loss': 0.0011891415432328358, 'time_step': 0.041882804870605465, 'td_error': 5.746285374154668, 'value_scale': 10.762136984898335, 'discounted_advantage': -8.08600259152675, 'initial_state': 14.167778015136719, 'diff_eval': 2965.592255825643} step=80000
2025-12-06 08:41.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.45it/s, critic_loss=12, conservative_loss=-0.0409, alpha=0.000682, actor_loss=-8.39, temp=0.234, temp_loss=0.00149]  


2025-12-06 08:41.50 [info     ] CQL_20251206073907: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.005006589651107788, 'time_algorithm_update': 0.03669320273399353, 'critic_loss': 12.00956885433197, 'conservative_loss': -0.04087196977436543, 'alpha': 0.0006819901554845273, 'actor_loss': -8.389805459022522, 'temp': 0.23438434962928295, 'temp_loss': 0.0016496679372503423, 'time_step': 0.04198872137069702, 'td_error': 6.547074374073624, 'value_scale': 10.32504223644724, 'discounted_advantage': -7.950573640010951, 'initial_state': 11.216591835021973, 'diff_eval': 3221.053276491496} step=81000
2025-12-06 08:41.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.49it/s, critic_loss=12.2, conservative_loss=-0.0371, alpha=0.000623, actor_loss=-8.48, temp=0.234, temp_loss=0.00115] 


2025-12-06 08:42.36 [info     ] CQL_20251206073907: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.005111215591430664, 'time_algorithm_update': 0.036455276012420657, 'critic_loss': 12.178252875566482, 'conservative_loss': -0.037120286528021094, 'alpha': 0.000623094130132813, 'actor_loss': -8.48308956670761, 'temp': 0.23356583216786383, 'temp_loss': 0.0009962699534371495, 'time_step': 0.041874433755874636, 'td_error': 5.781553376804158, 'value_scale': 10.206943483945148, 'discounted_advantage': -8.230837556440997, 'initial_state': 13.901944160461426, 'diff_eval': 3056.65139786012} step=82000
2025-12-06 08:42.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.07it/s, critic_loss=12.3, conservative_loss=-0.0339, alpha=0.00057, actor_loss=-8.5, temp=0.233, temp_loss=0.00112]   


2025-12-06 08:43.22 [info     ] CQL_20251206073907: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.005218572378158569, 'time_algorithm_update': 0.037170191287994384, 'critic_loss': 12.325366665363312, 'conservative_loss': -0.03387994228117168, 'alpha': 0.0005693474636063911, 'actor_loss': -8.499283418655395, 'temp': 0.2325618093162775, 'temp_loss': 0.0011533518764190375, 'time_step': 0.04267015957832337, 'td_error': 5.899671762083834, 'value_scale': 10.160282107004512, 'discounted_advantage': -8.487342338730627, 'initial_state': 13.193425178527832, 'diff_eval': 3301.985752089362} step=83000
2025-12-06 08:43.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.12it/s, critic_loss=12.3, conservative_loss=-0.0308, alpha=0.00052, actor_loss=-8.52, temp=0.229, temp_loss=0.00248]


2025-12-06 08:44.07 [info     ] CQL_20251206073907: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.004909731149673462, 'time_algorithm_update': 0.03554623508453369, 'critic_loss': 12.278206847190857, 'conservative_loss': -0.030779707012698054, 'alpha': 0.0005202509302762337, 'actor_loss': -8.51474157333374, 'temp': 0.22914915446937084, 'temp_loss': 0.0025989864550647326, 'time_step': 0.04074174022674561, 'td_error': 5.835187142364574, 'value_scale': 10.223427622611045, 'discounted_advantage': -8.02346542533342, 'initial_state': 13.456372261047363, 'diff_eval': 3205.708626132256} step=84000
2025-12-06 08:44.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.05it/s, critic_loss=12.3, conservative_loss=-0.0281, alpha=0.000476, actor_loss=-8.47, temp=0.228, temp_loss=0.000821]


2025-12-06 08:44.54 [info     ] CQL_20251206073907: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.005234398603439331, 'time_algorithm_update': 0.03713409733772278, 'critic_loss': 12.287027723789215, 'conservative_loss': -0.02807421201840043, 'alpha': 0.00047539661658811385, 'actor_loss': -8.466642477512359, 'temp': 0.22816312670707703, 'temp_loss': 0.0006697932840324938, 'time_step': 0.04267563462257385, 'td_error': 5.995510028413168, 'value_scale': 10.432405792782513, 'discounted_advantage': -8.485401700075435, 'initial_state': 12.981310844421387, 'diff_eval': 3354.4882899746376} step=85000
2025-12-06 08:44.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.48it/s, critic_loss=12.5, conservative_loss=-0.0256, alpha=0.000435, actor_loss=-8.47, temp=0.226, temp_loss=0.0019] 


2025-12-06 08:45.41 [info     ] CQL_20251206073907: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.00506894040107727, 'time_algorithm_update': 0.03658514738082886, 'critic_loss': 12.464631702661514, 'conservative_loss': -0.025562184024602176, 'alpha': 0.00043437173930578865, 'actor_loss': -8.469979437828064, 'temp': 0.22612404808402062, 'temp_loss': 0.001838483594590798, 'time_step': 0.04194461822509766, 'td_error': 5.899830652329352, 'value_scale': 10.442789919003033, 'discounted_advantage': -8.10147733295183, 'initial_state': 13.524273872375488, 'diff_eval': 3473.163543485877} step=86000
2025-12-06 08:45.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.17it/s, critic_loss=12.4, conservative_loss=-0.0234, alpha=0.000397, actor_loss=-8.55, temp=0.226, temp_loss=-0.00116]


2025-12-06 08:46.28 [info     ] CQL_20251206073907: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.0052837421894073484, 'time_algorithm_update': 0.036865254878997805, 'critic_loss': 12.425879917144776, 'conservative_loss': -0.02338752038963139, 'alpha': 0.0003968550407153089, 'actor_loss': -8.553872414588929, 'temp': 0.22560178992152213, 'temp_loss': -0.0009780256868107244, 'time_step': 0.042440690994262696, 'td_error': 5.7150889922560175, 'value_scale': 10.133892429989434, 'discounted_advantage': -8.174225861872886, 'initial_state': 13.161267280578613, 'diff_eval': 3511.6853168917587} step=87000
2025-12-06 08:46.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.90it/s, critic_loss=12.4, conservative_loss=-0.0213, alpha=0.000363, actor_loss=-8.61, temp=0.225, temp_loss=0.00328] 


2025-12-06 08:47.15 [info     ] CQL_20251206073907: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.005143216848373413, 'time_algorithm_update': 0.03760775446891785, 'critic_loss': 12.41004846572876, 'conservative_loss': -0.021261573657393455, 'alpha': 0.00036257413958082905, 'actor_loss': -8.608859757900237, 'temp': 0.22496261516213417, 'temp_loss': 0.00335164696990978, 'time_step': 0.0430252423286438, 'td_error': 5.849064235516353, 'value_scale': 10.118773555894416, 'discounted_advantage': -8.79396473070201, 'initial_state': 12.563302993774414, 'diff_eval': 3037.9908667900386} step=88000
2025-12-06 08:47.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.93it/s, critic_loss=12.5, conservative_loss=-0.0194, alpha=0.000331, actor_loss=-8.69, temp=0.222, temp_loss=0.000689]


2025-12-06 08:48.00 [info     ] CQL_20251206073907: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.0049139118194580075, 'time_algorithm_update': 0.0358864529132843, 'critic_loss': 12.460077752113342, 'conservative_loss': -0.01940379078499973, 'alpha': 0.0003312843017629348, 'actor_loss': -8.687533565044403, 'temp': 0.22208947587013245, 'temp_loss': 0.0006173386861337348, 'time_step': 0.04109230017662048, 'td_error': 5.960352289634912, 'value_scale': 10.445177846541355, 'discounted_advantage': -9.23000016492388, 'initial_state': 12.785996437072754, 'diff_eval': 3113.7569332183125} step=89000
2025-12-06 08:48.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.43it/s, critic_loss=12.5, conservative_loss=-0.0176, alpha=0.000303, actor_loss=-8.85, temp=0.222, temp_loss=0.00118] 


2025-12-06 08:48.46 [info     ] CQL_20251206073907: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.005138103246688843, 'time_algorithm_update': 0.03658266758918762, 'critic_loss': 12.508865925550461, 'conservative_loss': -0.017597725649364293, 'alpha': 0.0003026871416950598, 'actor_loss': -8.85508411359787, 'temp': 0.22198736335337163, 'temp_loss': 0.0012677463031141088, 'time_step': 0.04201404929161072, 'td_error': 5.9644271432070495, 'value_scale': 10.902537221774422, 'discounted_advantage': -9.041469456937408, 'initial_state': 14.1712064743042, 'diff_eval': 3323.4640829403897} step=90000
2025-12-06 08:48.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.11it/s, critic_loss=12.7, conservative_loss=-0.016, alpha=0.000277, actor_loss=-9.04, temp=0.22, temp_loss=-0.00171] 


2025-12-06 08:49.33 [info     ] CQL_20251206073907: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.005173648357391357, 'time_algorithm_update': 0.03709210968017578, 'critic_loss': 12.680980704784393, 'conservative_loss': -0.015970773025415837, 'alpha': 0.00027669669131864794, 'actor_loss': -9.044306496620178, 'temp': 0.22039519141614436, 'temp_loss': -0.0016182224545627831, 'time_step': 0.04256128239631653, 'td_error': 6.065164429139983, 'value_scale': 10.56765903737193, 'discounted_advantage': -8.966572243679089, 'initial_state': 12.822101593017578, 'diff_eval': 3349.053422906468} step=91000
2025-12-06 08:49.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.13it/s, critic_loss=13, conservative_loss=-0.0146, alpha=0.000253, actor_loss=-9.17, temp=0.222, temp_loss=0.000799]  


2025-12-06 08:50.20 [info     ] CQL_20251206073907: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.0052035460472106936, 'time_algorithm_update': 0.037008031129837034, 'critic_loss': 13.000861591339111, 'conservative_loss': -0.014555121677927673, 'alpha': 0.00025282283464912324, 'actor_loss': -9.171982586860656, 'temp': 0.22225196985900403, 'temp_loss': 0.0008473421420203521, 'time_step': 0.042520785808563236, 'td_error': 6.553714559536089, 'value_scale': 11.068035701461516, 'discounted_advantage': -9.337182330010261, 'initial_state': 12.619498252868652, 'diff_eval': 3295.7285651873176} step=92000
2025-12-06 08:50.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.53it/s, critic_loss=13.3, conservative_loss=-0.0132, alpha=0.000231, actor_loss=-9.26, temp=0.22, temp_loss=0.00157] 


2025-12-06 08:51.06 [info     ] CQL_20251206073907: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.00504168701171875, 'time_algorithm_update': 0.03647354030609131, 'critic_loss': 13.271460008621215, 'conservative_loss': -0.013149642956443131, 'alpha': 0.0002311546692799311, 'actor_loss': -9.25831612920761, 'temp': 0.2196998552531004, 'temp_loss': 0.0016021084430394694, 'time_step': 0.041799533367156985, 'td_error': 6.337007870030749, 'value_scale': 11.498051516062207, 'discounted_advantage': -9.237958339557775, 'initial_state': 14.413808822631836, 'diff_eval': 3371.265700544918} step=93000
2025-12-06 08:51.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.66it/s, critic_loss=13.3, conservative_loss=-0.012, alpha=0.000211, actor_loss=-9.25, temp=0.22, temp_loss=-0.000731] 


2025-12-06 08:51.52 [info     ] CQL_20251206073907: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.005023524045944214, 'time_algorithm_update': 0.03627664971351623, 'critic_loss': 13.32763987159729, 'conservative_loss': -0.012022617496550083, 'alpha': 0.00021126470649323893, 'actor_loss': -9.25112515115738, 'temp': 0.21994659443199635, 'temp_loss': -0.0007985469932900741, 'time_step': 0.0415772967338562, 'td_error': 6.646126891401835, 'value_scale': 11.547929750605544, 'discounted_advantage': -9.249710908528984, 'initial_state': 13.844551086425781, 'diff_eval': 3217.4274972146472} step=94000
2025-12-06 08:51.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.34it/s, critic_loss=13.4, conservative_loss=-0.011, alpha=0.000193, actor_loss=-9.31, temp=0.222, temp_loss=-0.000529]


2025-12-06 08:52.38 [info     ] CQL_20251206073907: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.005127981662750244, 'time_algorithm_update': 0.036753493785858155, 'critic_loss': 13.40604230594635, 'conservative_loss': -0.010964530657976866, 'alpha': 0.00019301502979942597, 'actor_loss': -9.314165360927582, 'temp': 0.22154833723604678, 'temp_loss': -0.0005821591331623495, 'time_step': 0.04216323351860046, 'td_error': 6.2099337307015405, 'value_scale': 11.148396507127263, 'discounted_advantage': -9.220645623464518, 'initial_state': 14.301889419555664, 'diff_eval': 3392.15362814983} step=95000
2025-12-06 08:52.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.43it/s, critic_loss=13.4, conservative_loss=-0.0101, alpha=0.000176, actor_loss=-9.21, temp=0.221, temp_loss=-0.0016] 


2025-12-06 08:53.24 [info     ] CQL_20251206073907: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.005015013694763183, 'time_algorithm_update': 0.03668966913223266, 'critic_loss': 13.416968185424805, 'conservative_loss': -0.010050044766627252, 'alpha': 0.00017633025083341636, 'actor_loss': -9.208689903736115, 'temp': 0.22121074560284615, 'temp_loss': -0.0013596970576327293, 'time_step': 0.04200063800811767, 'td_error': 6.310908559187162, 'value_scale': 11.326045230524388, 'discounted_advantage': -9.080218071010888, 'initial_state': 14.957929611206055, 'diff_eval': 3330.56910856246} step=96000
2025-12-06 08:53.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.15it/s, critic_loss=13.6, conservative_loss=-0.00918, alpha=0.000161, actor_loss=-9.11, temp=0.224, temp_loss=-0.00446]


2025-12-06 08:54.11 [info     ] CQL_20251206073907: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.005105591773986816, 'time_algorithm_update': 0.037149813413619996, 'critic_loss': 13.65893593120575, 'conservative_loss': -0.009177456365898251, 'alpha': 0.00016105780529323966, 'actor_loss': -9.115813971042632, 'temp': 0.22439355465769767, 'temp_loss': -0.004406461990787648, 'time_step': 0.04254650354385376, 'td_error': 6.633771446491194, 'value_scale': 11.202444975839715, 'discounted_advantage': -9.17185236688183, 'initial_state': 14.834419250488281, 'diff_eval': 3481.5937000296367} step=97000
2025-12-06 08:54.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.32it/s, critic_loss=13.4, conservative_loss=-0.00836, alpha=0.000147, actor_loss=-9.01, temp=0.228, temp_loss=-0.000192]


2025-12-06 08:54.58 [info     ] CQL_20251206073907: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.005088342666625977, 'time_algorithm_update': 0.03678844714164734, 'critic_loss': 13.351590169906617, 'conservative_loss': -0.008357063533272594, 'alpha': 0.00014713490191206802, 'actor_loss': -9.006806615829468, 'temp': 0.22779520924389363, 'temp_loss': -0.0003105779096949846, 'time_step': 0.04217876648902893, 'td_error': 6.336928525554892, 'value_scale': 11.318268826161102, 'discounted_advantage': -9.233940164000348, 'initial_state': 16.558082580566406, 'diff_eval': 3485.331923247688} step=98000
2025-12-06 08:54.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.29it/s, critic_loss=13.4, conservative_loss=-0.00762, alpha=0.000134, actor_loss=-8.91, temp=0.228, temp_loss=0.000798]


2025-12-06 08:55.45 [info     ] CQL_20251206073907: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.00507395601272583, 'time_algorithm_update': 0.036885847091674806, 'critic_loss': 13.353077982902526, 'conservative_loss': -0.007617978403810411, 'alpha': 0.00013442503240366932, 'actor_loss': -8.909824480056763, 'temp': 0.22791369417309762, 'temp_loss': 0.0008610538539942354, 'time_step': 0.042255597352981566, 'td_error': 6.363926163993551, 'value_scale': 11.141706172248844, 'discounted_advantage': -9.08954703852673, 'initial_state': 15.431989669799805, 'diff_eval': 3356.849002259362} step=99000
2025-12-06 08:55.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:45<00:00, 22.14it/s, critic_loss=13.3, conservative_loss=-0.00697, alpha=0.000123, actor_loss=-8.79, temp=0.227, temp_loss=0.000389]


2025-12-06 08:56.34 [info     ] CQL_20251206073907: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.005222731590270996, 'time_algorithm_update': 0.03888817167282105, 'critic_loss': 13.298349390506745, 'conservative_loss': -0.006968903946690261, 'alpha': 0.00012280890528927558, 'actor_loss': -8.789458099842072, 'temp': 0.2265277301967144, 'temp_loss': 0.00040453812200576066, 'time_step': 0.04441317176818848, 'td_error': 6.56622506690884, 'value_scale': 10.910064378006217, 'discounted_advantage': -8.633370207185749, 'initial_state': 13.698802947998047, 'diff_eval': 3268.6896304189513} step=100000
2025-12-06 08:56.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.56it/s, critic_loss=13.2, conservative_loss=-0.00638, alpha=0.000112, actor_loss=-8.74, temp=0.228, temp_loss=-0.00301]


2025-12-06 08:57.20 [info     ] CQL_20251206073907: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.0050403492450714115, 'time_algorithm_update': 0.036443830251693725, 'critic_loss': 13.166077926635742, 'conservative_loss': -0.0063738575726747515, 'alpha': 0.00011217088127887109, 'actor_loss': -8.73449190711975, 'temp': 0.22759298582375048, 'temp_loss': -0.002919530822895467, 'time_step': 0.04178464603424072, 'td_error': 6.691729723084025, 'value_scale': 11.32768859037597, 'discounted_advantage': -8.640284077696647, 'initial_state': 14.636407852172852, 'diff_eval': 3283.869687446304} step=101000
2025-12-06 08:57.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.65it/s, critic_loss=13.2, conservative_loss=-0.0058, alpha=0.000103, actor_loss=-8.67, temp=0.23, temp_loss=-0.000131] 


2025-12-06 08:58.07 [info     ] CQL_20251206073907: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.005019146919250489, 'time_algorithm_update': 0.038131993293762205, 'critic_loss': 13.157510463237763, 'conservative_loss': -0.005795596464537084, 'alpha': 0.00010248192426661262, 'actor_loss': -8.673406890392304, 'temp': 0.2296926165819168, 'temp_loss': 2.7608413947746158e-06, 'time_step': 0.04345476818084717, 'td_error': 6.499594500637168, 'value_scale': 10.882842878887956, 'discounted_advantage': -8.642848005910576, 'initial_state': 13.88012409210205, 'diff_eval': 3802.9341346110737} step=102000
2025-12-06 08:58.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.66it/s, critic_loss=13, conservative_loss=-0.00527, alpha=9.37e-5, actor_loss=-8.74, temp=0.229, temp_loss=0.00166]  


2025-12-06 08:58.53 [info     ] CQL_20251206073907: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.005015290021896363, 'time_algorithm_update': 0.03628753399848938, 'critic_loss': 12.98675963139534, 'conservative_loss': -0.005267310888506472, 'alpha': 9.364298199943733e-05, 'actor_loss': -8.741413725376129, 'temp': 0.22862238393723966, 'temp_loss': 0.0015717297517694533, 'time_step': 0.041594233512878415, 'td_error': 6.455960291368813, 'value_scale': 10.593724302188724, 'discounted_advantage': -8.566272828244335, 'initial_state': 14.318999290466309, 'diff_eval': 3512.3675805462226} step=103000
2025-12-06 08:58.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.37it/s, critic_loss=12.9, conservative_loss=-0.00481, alpha=8.56e-5, actor_loss=-8.8, temp=0.229, temp_loss=-0.00196] 


2025-12-06 08:59.40 [info     ] CQL_20251206073907: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.005087733030319214, 'time_algorithm_update': 0.036712767601013184, 'critic_loss': 12.908670496940612, 'conservative_loss': -0.004808203233405948, 'alpha': 8.556149923242629e-05, 'actor_loss': -8.801349746227265, 'temp': 0.2294467496573925, 'temp_loss': -0.001939623204874806, 'time_step': 0.04210171175003052, 'td_error': 6.475679563437485, 'value_scale': 11.170736342334413, 'discounted_advantage': -8.560838176560658, 'initial_state': 13.869768142700195, 'diff_eval': 3837.431840756574} step=104000
2025-12-06 08:59.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.65it/s, critic_loss=13.2, conservative_loss=-0.00435, alpha=7.82e-5, actor_loss=-8.87, temp=0.229, temp_loss=0.00279]


2025-12-06 09:00.25 [info     ] CQL_20251206073907: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.0050542778968811035, 'time_algorithm_update': 0.03628135108947754, 'critic_loss': 13.215338690280914, 'conservative_loss': -0.004345755700021982, 'alpha': 7.819456530705792e-05, 'actor_loss': -8.871244961738586, 'temp': 0.22851897637546062, 'temp_loss': 0.0029067944150883702, 'time_step': 0.041632978200912475, 'td_error': 6.635587956714568, 'value_scale': 11.140354280938508, 'discounted_advantage': -8.87230181035711, 'initial_state': 13.704802513122559, 'diff_eval': 3642.6165994885414} step=105000
2025-12-06 09:00.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.65it/s, critic_loss=13.5, conservative_loss=-0.00394, alpha=7.15e-5, actor_loss=-8.91, temp=0.227, temp_loss=0.000652]


2025-12-06 09:01.11 [info     ] CQL_20251206073907: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.004961447477340698, 'time_algorithm_update': 0.0363248438835144, 'critic_loss': 13.483094603538513, 'conservative_loss': -0.003935725677525624, 'alpha': 7.150078957056394e-05, 'actor_loss': -8.910367966651917, 'temp': 0.22666793794929982, 'temp_loss': 0.0006329975822009146, 'time_step': 0.041578553438186644, 'td_error': 6.554618123951713, 'value_scale': 10.86319567019874, 'discounted_advantage': -8.792250503856021, 'initial_state': 13.615869522094727, 'diff_eval': 3533.252005530153} step=106000
2025-12-06 09:01.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.71it/s, critic_loss=13.4, conservative_loss=-0.0036, alpha=6.54e-5, actor_loss=-8.95, temp=0.226, temp_loss=-0.000413]


2025-12-06 09:01.57 [info     ] CQL_20251206073907: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.005005138397216797, 'time_algorithm_update': 0.036180869579315184, 'critic_loss': 13.390620559215545, 'conservative_loss': -0.003598677387228236, 'alpha': 6.534950969216879e-05, 'actor_loss': -8.952619641304016, 'temp': 0.2261294263601303, 'temp_loss': -0.0004274882957106456, 'time_step': 0.041483458042144775, 'td_error': 6.275674503074053, 'value_scale': 11.010111782325108, 'discounted_advantage': -8.907287274915022, 'initial_state': 14.426887512207031, 'diff_eval': 3322.131950908939} step=107000
2025-12-06 09:01.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.43it/s, critic_loss=13.5, conservative_loss=-0.00326, alpha=5.97e-5, actor_loss=-9.02, temp=0.225, temp_loss=0.000855]


2025-12-06 09:02.43 [info     ] CQL_20251206073907: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.005124313592910767, 'time_algorithm_update': 0.03658261251449585, 'critic_loss': 13.525123474597931, 'conservative_loss': -0.0032618894251063464, 'alpha': 5.971139826215222e-05, 'actor_loss': -9.024950027942657, 'temp': 0.22541269698739053, 'temp_loss': 0.0009225130878621712, 'time_step': 0.04200387072563171, 'td_error': 6.02474745110781, 'value_scale': 10.89810896132919, 'discounted_advantage': -8.413122395885406, 'initial_state': 15.706892013549805, 'diff_eval': 3335.681298939617} step=108000
2025-12-06 09:02.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.60it/s, critic_loss=13.5, conservative_loss=-0.003, alpha=5.46e-5, actor_loss=-9.1, temp=0.227, temp_loss=-0.00143]  


2025-12-06 09:03.29 [info     ] CQL_20251206073907: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.005062389850616455, 'time_algorithm_update': 0.036296452283859255, 'critic_loss': 13.502529942512512, 'conservative_loss': -0.003003912918968126, 'alpha': 5.4562863133469364e-05, 'actor_loss': -9.098675750255584, 'temp': 0.2267224029302597, 'temp_loss': -0.0015138959391042591, 'time_step': 0.04165429401397705, 'td_error': 6.283238188635193, 'value_scale': 10.25886732802788, 'discounted_advantage': -8.25549542109578, 'initial_state': 14.250053405761719, 'diff_eval': 3155.4887167359166} step=109000
2025-12-06 09:03.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.26it/s, critic_loss=13.7, conservative_loss=-0.0027, alpha=4.99e-5, actor_loss=-9.13, temp=0.226, temp_loss=0.00211]  


2025-12-06 09:04.15 [info     ] CQL_20251206073907: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.005095217943191528, 'time_algorithm_update': 0.03681110382080078, 'critic_loss': 13.725553684234619, 'conservative_loss': -0.00269761053705588, 'alpha': 4.985521694834461e-05, 'actor_loss': -9.13547550201416, 'temp': 0.22617666579782963, 'temp_loss': 0.0021704087957041336, 'time_step': 0.04222691774368286, 'td_error': 6.288632165689508, 'value_scale': 10.004029197003208, 'discounted_advantage': -8.716047258430063, 'initial_state': 12.8908109664917, 'diff_eval': 3982.0132436467975} step=110000
2025-12-06 09:04.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.49it/s, critic_loss=13.7, conservative_loss=-0.00246, alpha=4.56e-5, actor_loss=-9.15, temp=0.225, temp_loss=-0.000648]


2025-12-06 09:05.01 [info     ] CQL_20251206073907: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.00508087968826294, 'time_algorithm_update': 0.03650794315338135, 'critic_loss': 13.744417977809906, 'conservative_loss': -0.0024617272885516287, 'alpha': 4.5583648097817784e-05, 'actor_loss': -9.148568918704987, 'temp': 0.2248620542883873, 'temp_loss': -0.000694917407934554, 'time_step': 0.04188095688819885, 'td_error': 6.735213410074097, 'value_scale': 10.515232634195888, 'discounted_advantage': -8.308071280965647, 'initial_state': 12.66139030456543, 'diff_eval': 3467.700816535536} step=111000
2025-12-06 09:05.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.44it/s, critic_loss=13.6, conservative_loss=-0.00224, alpha=4.17e-5, actor_loss=-9.15, temp=0.226, temp_loss=-0.00125]


2025-12-06 09:05.47 [info     ] CQL_20251206073907: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.005093291997909546, 'time_algorithm_update': 0.0365932834148407, 'critic_loss': 13.633967680931091, 'conservative_loss': -0.0022417690312722697, 'alpha': 4.165110851317877e-05, 'actor_loss': -9.154137880802155, 'temp': 0.2258631521612406, 'temp_loss': -0.0011742107382742687, 'time_step': 0.04198017287254333, 'td_error': 6.443251908811117, 'value_scale': 10.428491914907138, 'discounted_advantage': -8.332956983949897, 'initial_state': 14.70181655883789, 'diff_eval': 3864.94155611569} step=112000
2025-12-06 09:05.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.78it/s, critic_loss=13.9, conservative_loss=-0.00204, alpha=3.81e-5, actor_loss=-9.17, temp=0.227, temp_loss=0.000394]


2025-12-06 09:06.35 [info     ] CQL_20251206073907: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.005032817125320435, 'time_algorithm_update': 0.037872713565826416, 'critic_loss': 13.857034920215607, 'conservative_loss': -0.0020426962623605505, 'alpha': 3.806140124652302e-05, 'actor_loss': -9.168777779102326, 'temp': 0.22711881425976754, 'temp_loss': 0.0005468200655886903, 'time_step': 0.04320514726638794, 'td_error': 6.224843341648517, 'value_scale': 9.95571927131643, 'discounted_advantage': -8.130174289806359, 'initial_state': 13.167804718017578, 'diff_eval': 3529.3484738731254} step=113000
2025-12-06 09:06.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.65it/s, critic_loss=13.8, conservative_loss=-0.00187, alpha=3.48e-5, actor_loss=-9.18, temp=0.224, temp_loss=0.00164]


2025-12-06 09:07.21 [info     ] CQL_20251206073907: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.0050489394664764405, 'time_algorithm_update': 0.036273014307022095, 'critic_loss': 13.789289630889893, 'conservative_loss': -0.001865710494457744, 'alpha': 3.4781095979269594e-05, 'actor_loss': -9.1774263048172, 'temp': 0.22443454678356647, 'temp_loss': 0.0016215797978220508, 'time_step': 0.04160880875587463, 'td_error': 6.142939889756561, 'value_scale': 10.915026447010497, 'discounted_advantage': -8.186231441676432, 'initial_state': 14.90383529663086, 'diff_eval': 3432.4035160717713} step=114000
2025-12-06 09:07.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.10it/s, critic_loss=13.6, conservative_loss=-0.00171, alpha=3.18e-5, actor_loss=-9.18, temp=0.224, temp_loss=0.000853]


2025-12-06 09:08.07 [info     ] CQL_20251206073907: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.004947792053222657, 'time_algorithm_update': 0.03732757329940796, 'critic_loss': 13.631033215522766, 'conservative_loss': -0.0017076241930481047, 'alpha': 3.177149905786791e-05, 'actor_loss': -9.178023390293122, 'temp': 0.22389369657635688, 'temp_loss': 0.001120666512637399, 'time_step': 0.042575016021728515, 'td_error': 6.092299284128703, 'value_scale': 10.467615578907534, 'discounted_advantage': -8.018328530957753, 'initial_state': 14.493569374084473, 'diff_eval': 3569.2535434629995} step=115000
2025-12-06 09:08.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.37it/s, critic_loss=13.7, conservative_loss=-0.00156, alpha=2.9e-5, actor_loss=-9.09, temp=0.223, temp_loss=-0.00172] 


2025-12-06 09:08.54 [info     ] CQL_20251206073907: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.0051244466304779055, 'time_algorithm_update': 0.03669306826591492, 'critic_loss': 13.692522236824036, 'conservative_loss': -0.0015631610897835343, 'alpha': 2.9016833754212712e-05, 'actor_loss': -9.090033385753632, 'temp': 0.22307217314839364, 'temp_loss': -0.0017879967485787346, 'time_step': 0.042107154607772826, 'td_error': 5.795445888130333, 'value_scale': 9.915273215854345, 'discounted_advantage': -7.766660176362567, 'initial_state': 13.656760215759277, 'diff_eval': 3564.1588049845464} step=116000
2025-12-06 09:08.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.55it/s, critic_loss=13.7, conservative_loss=-0.00143, alpha=2.65e-5, actor_loss=-9.12, temp=0.223, temp_loss=0.00201]


2025-12-06 09:09.40 [info     ] CQL_20251206073907: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.004970665693283081, 'time_algorithm_update': 0.036476703643798826, 'critic_loss': 13.648721144676209, 'conservative_loss': -0.0014297927756560967, 'alpha': 2.6504464774916415e-05, 'actor_loss': -9.123888543605805, 'temp': 0.22347719214856623, 'temp_loss': 0.0020142536584753542, 'time_step': 0.04174822664260864, 'td_error': 6.502696079332172, 'value_scale': 9.903298980255876, 'discounted_advantage': -8.05219968382901, 'initial_state': 12.102004051208496, 'diff_eval': 3278.836831324677} step=117000
2025-12-06 09:09.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.39it/s, critic_loss=13.7, conservative_loss=-0.0013, alpha=2.42e-5, actor_loss=-9.09, temp=0.222, temp_loss=-0.00044] 


2025-12-06 09:10.26 [info     ] CQL_20251206073907: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.005085822343826294, 'time_algorithm_update': 0.03668596529960632, 'critic_loss': 13.649178682804108, 'conservative_loss': -0.00130203777377028, 'alpha': 2.4211883117459364e-05, 'actor_loss': -9.084165414333343, 'temp': 0.2219242871850729, 'temp_loss': -0.0003124829694861546, 'time_step': 0.04206543087959289, 'td_error': 6.019934795772237, 'value_scale': 10.049981155355866, 'discounted_advantage': -8.05651002124269, 'initial_state': 13.792346000671387, 'diff_eval': 3751.024399691352} step=118000
2025-12-06 09:10.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.98it/s, critic_loss=13.7, conservative_loss=-0.00119, alpha=2.21e-5, actor_loss=-9.01, temp=0.223, temp_loss=-0.0025] 


2025-12-06 09:11.11 [info     ] CQL_20251206073907: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.004961144924163818, 'time_algorithm_update': 0.03571486854553223, 'critic_loss': 13.673848672866821, 'conservative_loss': -0.0011902893596561625, 'alpha': 2.211949785851175e-05, 'actor_loss': -9.011362097263337, 'temp': 0.22326152366399765, 'temp_loss': -0.002492863085586578, 'time_step': 0.04098637533187866, 'td_error': 5.949644790863558, 'value_scale': 10.368632575962302, 'discounted_advantage': -7.886258503829903, 'initial_state': 15.023743629455566, 'diff_eval': 3185.376470148333} step=119000
2025-12-06 09:11.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.61it/s, critic_loss=13.5, conservative_loss=-0.00109, alpha=2.02e-5, actor_loss=-8.89, temp=0.224, temp_loss=0.00204]


2025-12-06 09:11.57 [info     ] CQL_20251206073907: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.005027687311172486, 'time_algorithm_update': 0.03630610466003418, 'critic_loss': 13.55048728632927, 'conservative_loss': -0.0010934976266580635, 'alpha': 2.020143911613559e-05, 'actor_loss': -8.895283715248109, 'temp': 0.22390809862315655, 'temp_loss': 0.001926983045239467, 'time_step': 0.041634668111801144, 'td_error': 6.021926741963777, 'value_scale': 10.311513264278654, 'discounted_advantage': -8.185648022944468, 'initial_state': 14.670645713806152, 'diff_eval': 3479.6235048651274} step=120000
2025-12-06 09:11.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.70it/s, critic_loss=13.5, conservative_loss=-0.000989, alpha=1.85e-5, actor_loss=-8.8, temp=0.223, temp_loss=0.00151] 


2025-12-06 09:12.43 [info     ] CQL_20251206073907: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.005083611965179443, 'time_algorithm_update': 0.036131913900375366, 'critic_loss': 13.441511407852174, 'conservative_loss': -0.0009890183404204435, 'alpha': 1.84535407970543e-05, 'actor_loss': -8.793287086009979, 'temp': 0.22257286831736564, 'temp_loss': 0.0016326096333796158, 'time_step': 0.04151211309432983, 'td_error': 6.200204024592068, 'value_scale': 10.154582338362358, 'discounted_advantage': -7.975343698470956, 'initial_state': 13.44708251953125, 'diff_eval': 3661.2886255111434} step=121000
2025-12-06 09:12.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.42it/s, critic_loss=13.2, conservative_loss=-0.000903, alpha=1.69e-5, actor_loss=-8.73, temp=0.22, temp_loss=0.00155]  


2025-12-06 09:13.29 [info     ] CQL_20251206073907: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.005131927251815796, 'time_algorithm_update': 0.03652397179603577, 'critic_loss': 13.148188750267028, 'conservative_loss': -0.0009025510564097203, 'alpha': 1.6863461649336388e-05, 'actor_loss': -8.724048615455628, 'temp': 0.22018190068006516, 'temp_loss': 0.0013558293639216572, 'time_step': 0.04196248483657837, 'td_error': 6.069434697327121, 'value_scale': 10.322672309823123, 'discounted_advantage': -8.124854990062826, 'initial_state': 13.785255432128906, 'diff_eval': 3365.0667133484876} step=122000
2025-12-06 09:13.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.53it/s, critic_loss=12.9, conservative_loss=-0.000827, alpha=1.54e-5, actor_loss=-8.65, temp=0.22, temp_loss=-0.00104]


2025-12-06 09:14.15 [info     ] CQL_20251206073907: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.005065915107727051, 'time_algorithm_update': 0.03646265387535095, 'critic_loss': 12.917022611618043, 'conservative_loss': -0.0008268143643508666, 'alpha': 1.5406101204462174e-05, 'actor_loss': -8.65157916879654, 'temp': 0.22044849133491515, 'temp_loss': -0.0011115606487728654, 'time_step': 0.04182926845550537, 'td_error': 6.380102468186278, 'value_scale': 10.403069217844687, 'discounted_advantage': -8.290700202781208, 'initial_state': 13.464970588684082, 'diff_eval': 4215.555828079525} step=123000
2025-12-06 09:14.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.57it/s, critic_loss=12.9, conservative_loss=-0.000753, alpha=1.41e-5, actor_loss=-8.59, temp=0.22, temp_loss=0.00167] 


2025-12-06 09:15.01 [info     ] CQL_20251206073907: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.00503686785697937, 'time_algorithm_update': 0.036390424013137815, 'critic_loss': 12.94114456319809, 'conservative_loss': -0.0007525200893869624, 'alpha': 1.4074642455852882e-05, 'actor_loss': -8.589192269325256, 'temp': 0.21961831219494343, 'temp_loss': 0.0015570456859422847, 'time_step': 0.04173059678077698, 'td_error': 5.959205201662704, 'value_scale': 10.149528121626846, 'discounted_advantage': -7.766518168625086, 'initial_state': 13.954855918884277, 'diff_eval': 3676.2418897262924} step=124000
2025-12-06 09:15.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.37it/s, critic_loss=12.8, conservative_loss=-0.000691, alpha=1.29e-5, actor_loss=-8.46, temp=0.22, temp_loss=-0.00123] 


2025-12-06 09:15.48 [info     ] CQL_20251206073907: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.005036465883255005, 'time_algorithm_update': 0.03680507087707519, 'critic_loss': 12.815912245750427, 'conservative_loss': -0.0006910148054594174, 'alpha': 1.2856717184149603e-05, 'actor_loss': -8.467771096229553, 'temp': 0.22021891053020953, 'temp_loss': -0.001141778998891823, 'time_step': 0.042126265287399295, 'td_error': 5.824698334771961, 'value_scale': 10.684181890254473, 'discounted_advantage': -7.841120604214553, 'initial_state': 15.167987823486328, 'diff_eval': 3423.568017923617} step=125000
2025-12-06 09:15.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.90it/s, critic_loss=12.8, conservative_loss=-0.000632, alpha=1.17e-5, actor_loss=-8.45, temp=0.221, temp_loss=0.000949]


2025-12-06 09:16.35 [info     ] CQL_20251206073907: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.005022726774215698, 'time_algorithm_update': 0.03769733572006226, 'critic_loss': 12.74881030344963, 'conservative_loss': -0.0006315530193387531, 'alpha': 1.174292154792056e-05, 'actor_loss': -8.447639398097992, 'temp': 0.22052404496073724, 'temp_loss': 0.0009182992742862552, 'time_step': 0.04301895475387573, 'td_error': 5.878844967841115, 'value_scale': 10.35360794612312, 'discounted_advantage': -8.11084464303971, 'initial_state': 15.265568733215332, 'diff_eval': 3480.975604179512} step=126000
2025-12-06 09:16.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.59it/s, critic_loss=12.7, conservative_loss=-0.000576, alpha=1.07e-5, actor_loss=-8.47, temp=0.22, temp_loss=-0.00167]


2025-12-06 09:17.21 [info     ] CQL_20251206073907: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.005084843158721924, 'time_algorithm_update': 0.03634215617179871, 'critic_loss': 12.680611132860184, 'conservative_loss': -0.0005762273279833608, 'alpha': 1.0725286633714859e-05, 'actor_loss': -8.470702385425568, 'temp': 0.22021500024199486, 'temp_loss': -0.0016676388967316598, 'time_step': 0.04171471643447876, 'td_error': 6.060853630939155, 'value_scale': 9.808423795149492, 'discounted_advantage': -8.282831681518832, 'initial_state': 13.159758567810059, 'diff_eval': 3540.1167026779367} step=127000
2025-12-06 09:17.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.13it/s, critic_loss=12.4, conservative_loss=-0.000526, alpha=9.8e-6, actor_loss=-8.45, temp=0.222, temp_loss=-0.000209]


2025-12-06 09:18.08 [info     ] CQL_20251206073907: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.005167088985443115, 'time_algorithm_update': 0.037057906150817874, 'critic_loss': 12.39579523730278, 'conservative_loss': -0.0005260152222181204, 'alpha': 9.798220849916105e-06, 'actor_loss': -8.451795693397521, 'temp': 0.2216458878815174, 'temp_loss': -0.00012393636966589837, 'time_step': 0.042534339427948, 'td_error': 6.39524220874059, 'value_scale': 9.829349403581071, 'discounted_advantage': -8.319842256188656, 'initial_state': 12.326001167297363, 'diff_eval': 3528.6644764225116} step=128000
2025-12-06 09:18.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.47it/s, critic_loss=12.4, conservative_loss=-0.000474, alpha=8.96e-6, actor_loss=-8.52, temp=0.221, temp_loss=0.000256]


2025-12-06 09:18.54 [info     ] CQL_20251206073907: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.00497830581665039, 'time_algorithm_update': 0.03666752982139587, 'critic_loss': 12.435055260181427, 'conservative_loss': -0.0004743846653436776, 'alpha': 8.955127586887102e-06, 'actor_loss': -8.516708636283875, 'temp': 0.2214598043113947, 'temp_loss': 0.0003045571704860777, 'time_step': 0.04193934392929077, 'td_error': 5.80752187789587, 'value_scale': 10.633940153158045, 'discounted_advantage': -8.16782413716561, 'initial_state': 15.314018249511719, 'diff_eval': 3684.7690736889413} step=129000
2025-12-06 09:18.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.09it/s, critic_loss=12.6, conservative_loss=-0.000428, alpha=8.19e-6, actor_loss=-8.63, temp=0.22, temp_loss=0.00222] 


2025-12-06 09:19.39 [info     ] CQL_20251206073907: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.004884699821472168, 'time_algorithm_update': 0.03568763208389282, 'critic_loss': 12.58656964969635, 'conservative_loss': -0.00042830726658576166, 'alpha': 8.187801508029224e-06, 'actor_loss': -8.632579799175263, 'temp': 0.22013632372021674, 'temp_loss': 0.0021130246265092865, 'time_step': 0.040851250410079955, 'td_error': 6.190927544188593, 'value_scale': 9.969054373991359, 'discounted_advantage': -8.214855266452284, 'initial_state': 13.070049285888672, 'diff_eval': 3459.769618396571} step=130000
2025-12-06 09:19.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.83it/s, critic_loss=12.7, conservative_loss=-0.000391, alpha=7.49e-6, actor_loss=-8.7, temp=0.218, temp_loss=-0.000423]


2025-12-06 09:20.24 [info     ] CQL_20251206073907: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.004970561265945434, 'time_algorithm_update': 0.0360646026134491, 'critic_loss': 12.665394516468048, 'conservative_loss': -0.0003902498659736011, 'alpha': 7.486509195132385e-06, 'actor_loss': -8.69881976556778, 'temp': 0.21848924949765205, 'temp_loss': -0.00038632023939862847, 'time_step': 0.0413249979019165, 'td_error': 7.080524384432853, 'value_scale': 10.20893970182773, 'discounted_advantage': -8.285945037894964, 'initial_state': 11.622410774230957, 'diff_eval': 3861.320707600384} step=131000
2025-12-06 09:20.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.21it/s, critic_loss=12.7, conservative_loss=-0.000355, alpha=6.84e-6, actor_loss=-8.76, temp=0.218, temp_loss=0.00158]


2025-12-06 09:21.11 [info     ] CQL_20251206073907: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.0050391356945037845, 'time_algorithm_update': 0.03711526989936829, 'critic_loss': 12.702200970172882, 'conservative_loss': -0.00035507047953433355, 'alpha': 6.842266169314825e-06, 'actor_loss': -8.76257114315033, 'temp': 0.21813068847358227, 'temp_loss': 0.001580543515156023, 'time_step': 0.042426268100738525, 'td_error': 6.3166845709260135, 'value_scale': 10.633925865011637, 'discounted_advantage': -8.508613031685865, 'initial_state': 13.885893821716309, 'diff_eval': 3768.8865449996956} step=132000
2025-12-06 09:21.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.74it/s, critic_loss=12.7, conservative_loss=-0.000326, alpha=6.25e-6, actor_loss=-8.8, temp=0.217, temp_loss=0.000102] 


2025-12-06 09:21.57 [info     ] CQL_20251206073907: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.005015870094299316, 'time_algorithm_update': 0.03618153429031372, 'critic_loss': 12.652224763870239, 'conservative_loss': -0.0003257411355734803, 'alpha': 6.25192593133761e-06, 'actor_loss': -8.80235344362259, 'temp': 0.2171650137603283, 'temp_loss': 0.0001433663119096309, 'time_step': 0.04147892928123474, 'td_error': 6.01330518079766, 'value_scale': 10.45207576349195, 'discounted_advantage': -8.41988854907308, 'initial_state': 15.386399269104004, 'diff_eval': 3858.0726230846944} step=133000
2025-12-06 09:21.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.97it/s, critic_loss=12.9, conservative_loss=-0.000294, alpha=5.72e-6, actor_loss=-8.84, temp=0.215, temp_loss=0.000827]


2025-12-06 09:22.42 [info     ] CQL_20251206073907: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.004930232048034668, 'time_algorithm_update': 0.03588429713249207, 'critic_loss': 12.9026907954216, 'conservative_loss': -0.0002937008365552174, 'alpha': 5.7133948921546105e-06, 'actor_loss': -8.84075330543518, 'temp': 0.21547238908708097, 'temp_loss': 0.0006535562248900533, 'time_step': 0.04108756971359253, 'td_error': 5.835871483898109, 'value_scale': 9.982700572665296, 'discounted_advantage': -8.33125306597046, 'initial_state': 14.3363676071167, 'diff_eval': 3697.4326303511566} step=134000
2025-12-06 09:22.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.48it/s, critic_loss=13, conservative_loss=-0.000269, alpha=5.22e-6, actor_loss=-8.83, temp=0.218, temp_loss=-0.0012]  


2025-12-06 09:23.28 [info     ] CQL_20251206073907: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.005024741888046264, 'time_algorithm_update': 0.03661037802696228, 'critic_loss': 12.98455798482895, 'conservative_loss': -0.00026893874497909564, 'alpha': 5.222152797614399e-06, 'actor_loss': -8.834717674732207, 'temp': 0.21789359977841377, 'temp_loss': -0.0012338191606104374, 'time_step': 0.04192048692703247, 'td_error': 5.911949081687729, 'value_scale': 10.45767598862706, 'discounted_advantage': -8.329152791461032, 'initial_state': 14.622594833374023, 'diff_eval': 3687.226439263478} step=135000
2025-12-06 09:23.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.59it/s, critic_loss=12.9, conservative_loss=-0.000245, alpha=4.77e-6, actor_loss=-8.87, temp=0.219, temp_loss=-0.000604]


2025-12-06 09:24.16 [info     ] CQL_20251206073907: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.005277025938034058, 'time_algorithm_update': 0.03803137111663819, 'critic_loss': 12.948468670606614, 'conservative_loss': -0.00024491438522818496, 'alpha': 4.77102147124242e-06, 'actor_loss': -8.867529872894288, 'temp': 0.2187493684887886, 'temp_loss': -0.0006379866851493716, 'time_step': 0.04360071444511414, 'td_error': 6.045686883201977, 'value_scale': 10.536365777614378, 'discounted_advantage': -8.090853697346803, 'initial_state': 14.1522798538208, 'diff_eval': 3429.1578695085836} step=136000
2025-12-06 09:24.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.67it/s, critic_loss=13.2, conservative_loss=-0.000223, alpha=4.36e-6, actor_loss=-8.93, temp=0.218, temp_loss=-0.00144]


2025-12-06 09:25.01 [info     ] CQL_20251206073907: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.005051508903503418, 'time_algorithm_update': 0.03625490474700928, 'critic_loss': 13.146650493144989, 'conservative_loss': -0.00022304748150054365, 'alpha': 4.359872403256304e-06, 'actor_loss': -8.923171601772308, 'temp': 0.21811620575189591, 'temp_loss': -0.0013644689760403707, 'time_step': 0.04158614039421082, 'td_error': 6.227892794953632, 'value_scale': 10.231090246099425, 'discounted_advantage': -7.837181430471516, 'initial_state': 14.200525283813477, 'diff_eval': 3605.572693138247} step=137000
2025-12-06 09:25.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.80it/s, critic_loss=13.2, conservative_loss=-0.000202, alpha=3.99e-6, actor_loss=-8.94, temp=0.218, temp_loss=0.00254]


2025-12-06 09:25.47 [info     ] CQL_20251206073907: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.0051881585121154785, 'time_algorithm_update': 0.03588078880310058, 'critic_loss': 13.13659726524353, 'conservative_loss': -0.00020204635310801678, 'alpha': 3.985335718880378e-06, 'actor_loss': -8.942316321849823, 'temp': 0.21847684766352177, 'temp_loss': 0.002521074530552141, 'time_step': 0.04134294891357422, 'td_error': 6.136355551460064, 'value_scale': 10.508461185341837, 'discounted_advantage': -8.280940702667912, 'initial_state': 14.753174781799316, 'diff_eval': 3843.152178359741} step=138000
2025-12-06 09:25.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.35it/s, critic_loss=13, conservative_loss=-0.000184, alpha=3.64e-6, actor_loss=-8.93, temp=0.216, temp_loss=0.00083]   


2025-12-06 09:26.34 [info     ] CQL_20251206073907: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.005153025150299072, 'time_algorithm_update': 0.036732348442077636, 'critic_loss': 13.048651779651642, 'conservative_loss': -0.000184362813946791, 'alpha': 3.6420760782220896e-06, 'actor_loss': -8.933367293357849, 'temp': 0.21627452473342418, 'temp_loss': 0.000694041533919517, 'time_step': 0.04216875076293945, 'td_error': 6.408641673525382, 'value_scale': 10.127714733736026, 'discounted_advantage': -8.176746630612449, 'initial_state': 12.868295669555664, 'diff_eval': 4214.928575169946} step=139000
2025-12-06 09:26.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.97it/s, critic_loss=13.1, conservative_loss=-0.00017, alpha=3.33e-6, actor_loss=-8.97, temp=0.215, temp_loss=0.00183]


2025-12-06 09:27.21 [info     ] CQL_20251206073907: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.004957021474838257, 'time_algorithm_update': 0.03757200574874878, 'critic_loss': 13.1114660820961, 'conservative_loss': -0.00016947371252172162, 'alpha': 3.3272508762820505e-06, 'actor_loss': -8.968999882698059, 'temp': 0.2146529219597578, 'temp_loss': 0.0016433353401953356, 'time_step': 0.04282260322570801, 'td_error': 6.145988002370889, 'value_scale': 9.922392843094828, 'discounted_advantage': -8.41998671109223, 'initial_state': 14.410150527954102, 'diff_eval': 3916.71501383189} step=140000
2025-12-06 09:27.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.05it/s, critic_loss=13, conservative_loss=-0.000155, alpha=3.04e-6, actor_loss=-8.95, temp=0.215, temp_loss=-0.00115]  


2025-12-06 09:28.08 [info     ] CQL_20251206073907: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.005952648401260376, 'time_algorithm_update': 0.03639672589302063, 'critic_loss': 13.033220038890839, 'conservative_loss': -0.00015478754400101023, 'alpha': 3.0387793874524506e-06, 'actor_loss': -8.94783987903595, 'temp': 0.21523625537753105, 'temp_loss': -0.0011929648058721797, 'time_step': 0.04266175413131714, 'td_error': 6.095825201465419, 'value_scale': 9.83343053884158, 'discounted_advantage': -8.25347884913986, 'initial_state': 13.205743789672852, 'diff_eval': 4213.908989955398} step=141000
2025-12-06 09:28.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.99it/s, critic_loss=13.2, conservative_loss=-0.000141, alpha=2.78e-6, actor_loss=-8.98, temp=0.215, temp_loss=0.00152] 


2025-12-06 09:28.53 [info     ] CQL_20251206073907: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.004964163064956665, 'time_algorithm_update': 0.035750678539276125, 'critic_loss': 13.147133902549744, 'conservative_loss': -0.00014047737989312736, 'alpha': 2.7765166732933722e-06, 'actor_loss': -8.980832619190217, 'temp': 0.21499157434701918, 'temp_loss': 0.0015555719455005602, 'time_step': 0.041008403062820435, 'td_error': 5.9397348944320525, 'value_scale': 10.294686126089456, 'discounted_advantage': -8.19688298199755, 'initial_state': 15.480881690979004, 'diff_eval': 4140.008686342683} step=142000
2025-12-06 09:28.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.32it/s, critic_loss=13.1, conservative_loss=-0.000129, alpha=2.54e-6, actor_loss=-8.95, temp=0.213, temp_loss=0.00141]


2025-12-06 09:29.39 [info     ] CQL_20251206073907: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.005013713359832764, 'time_algorithm_update': 0.036865690231323245, 'critic_loss': 13.075197742462159, 'conservative_loss': -0.00012852854141237914, 'alpha': 2.5368773426635016e-06, 'actor_loss': -8.951769423484802, 'temp': 0.21263098706305028, 'temp_loss': 0.0015419426385778934, 'time_step': 0.04216869688034058, 'td_error': 6.151638432980126, 'value_scale': 10.036224070205748, 'discounted_advantage': -8.377714966117283, 'initial_state': 13.026479721069336, 'diff_eval': 4263.442489162744} step=143000
2025-12-06 09:29.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.62it/s, critic_loss=13.1, conservative_loss=-0.000117, alpha=2.32e-6, actor_loss=-8.96, temp=0.211, temp_loss=0.00157]


2025-12-06 09:30.25 [info     ] CQL_20251206073907: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.005032082557678222, 'time_algorithm_update': 0.036339864015579225, 'critic_loss': 13.128567431926728, 'conservative_loss': -0.00011646145660051843, 'alpha': 2.31853048467201e-06, 'actor_loss': -8.96092024230957, 'temp': 0.21127336136996747, 'temp_loss': 0.0016431305520236493, 'time_step': 0.04165502190589905, 'td_error': 5.912952194586033, 'value_scale': 9.839612608057292, 'discounted_advantage': -8.0644937168067, 'initial_state': 13.5121488571167, 'diff_eval': 4038.5688877941548} step=144000
2025-12-06 09:30.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.54it/s, critic_loss=13, conservative_loss=-0.000106, alpha=2.12e-6, actor_loss=-9, temp=0.211, temp_loss=-0.00077]    


2025-12-06 09:31.09 [info     ] CQL_20251206073907: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.004715425252914429, 'time_algorithm_update': 0.03506616234779358, 'critic_loss': 13.032750713825227, 'conservative_loss': -0.00010582981970219407, 'alpha': 2.1190519655647223e-06, 'actor_loss': -8.999243386268615, 'temp': 0.2105646264255047, 'temp_loss': -0.000773525980883278, 'time_step': 0.04005518865585327, 'td_error': 6.0701182607956, 'value_scale': 9.967614185383539, 'discounted_advantage': -8.047857517025912, 'initial_state': 13.578693389892578, 'diff_eval': 4007.599574217208} step=145000
2025-12-06 09:31.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.86it/s, critic_loss=13, conservative_loss=-9.73e-5, alpha=1.94e-6, actor_loss=-8.92, temp=0.211, temp_loss=0.000293]   


2025-12-06 09:31.55 [info     ] CQL_20251206073907: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.004793609619140625, 'time_algorithm_update': 0.03607049584388733, 'critic_loss': 12.971930673599243, 'conservative_loss': -9.722180483367992e-05, 'alpha': 1.936065479185345e-06, 'actor_loss': -8.919050746917724, 'temp': 0.21092347840964795, 'temp_loss': 0.0001374363648938015, 'time_step': 0.041171269416809084, 'td_error': 6.059745661818386, 'value_scale': 11.139759512016298, 'discounted_advantage': -8.556201767289888, 'initial_state': 16.160341262817383, 'diff_eval': 3760.322148317285} step=146000
2025-12-06 09:31.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.87it/s, critic_loss=12.8, conservative_loss=-8.94e-5, alpha=1.77e-6, actor_loss=-8.89, temp=0.211, temp_loss=-0.00116]


2025-12-06 09:32.40 [info     ] CQL_20251206073907: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.005012853145599365, 'time_algorithm_update': 0.03590295314788818, 'critic_loss': 12.834713751316071, 'conservative_loss': -8.933234427968273e-05, 'alpha': 1.768289597407602e-06, 'actor_loss': -8.886703111171723, 'temp': 0.21137343108654022, 'temp_loss': -0.0011739933422068134, 'time_step': 0.04120023894309997, 'td_error': 6.013380533317066, 'value_scale': 10.056039399272244, 'discounted_advantage': -8.160699049654061, 'initial_state': 13.322415351867676, 'diff_eval': 3823.669698566} step=147000
2025-12-06 09:32.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.58it/s, critic_loss=12.8, conservative_loss=-8.16e-5, alpha=1.62e-6, actor_loss=-8.84, temp=0.211, temp_loss=0.00104] 


2025-12-06 09:33.26 [info     ] CQL_20251206073907: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.005089027643203735, 'time_algorithm_update': 0.03635219120979309, 'critic_loss': 12.812353050470351, 'conservative_loss': -8.155440812697635e-05, 'alpha': 1.6150074435472561e-06, 'actor_loss': -8.83848284626007, 'temp': 0.21128009489178656, 'temp_loss': 0.0011347104958258569, 'time_step': 0.04172792172431946, 'td_error': 6.1577666511577265, 'value_scale': 9.845654956406264, 'discounted_advantage': -8.116898299106094, 'initial_state': 12.953405380249023, 'diff_eval': 3931.650246063449} step=148000
2025-12-06 09:33.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.67it/s, critic_loss=12.6, conservative_loss=-7.45e-5, alpha=1.48e-6, actor_loss=-8.84, temp=0.21, temp_loss=0.00201]  


2025-12-06 09:34.12 [info     ] CQL_20251206073907: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.005002109289169311, 'time_algorithm_update': 0.03627201581001282, 'critic_loss': 12.58274209356308, 'conservative_loss': -7.445118357645697e-05, 'alpha': 1.4751251646885066e-06, 'actor_loss': -8.843388411998749, 'temp': 0.20963742965459822, 'temp_loss': 0.001960450884886086, 'time_step': 0.04155751371383667, 'td_error': 6.084947917954635, 'value_scale': 10.710211685320795, 'discounted_advantage': -7.819045916936132, 'initial_state': 15.240996360778809, 'diff_eval': 4537.466990113883} step=149000
2025-12-06 09:34.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.73it/s, critic_loss=12.7, conservative_loss=-6.77e-5, alpha=1.35e-6, actor_loss=-8.87, temp=0.207, temp_loss=0.000991]


2025-12-06 09:34.57 [info     ] CQL_20251206073907: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.004973981380462646, 'time_algorithm_update': 0.03622867250442505, 'critic_loss': 12.66830160999298, 'conservative_loss': -6.771161975848373e-05, 'alpha': 1.3475448820372548e-06, 'actor_loss': -8.873065244197845, 'temp': 0.2074956334531307, 'temp_loss': 0.0010550617331755348, 'time_step': 0.04148018336296082, 'td_error': 6.211041709305982, 'value_scale': 10.202322469786909, 'discounted_advantage': -8.132442537273194, 'initial_state': 14.053885459899902, 'diff_eval': 4493.335547122668} step=150000
2025-12-06 09:34.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.26it/s, critic_loss=12.9, conservative_loss=-6.18e-5, alpha=1.23e-6, actor_loss=-8.86, temp=0.208, temp_loss=-0.0013] 


2025-12-06 09:35.44 [info     ] CQL_20251206073907: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.005089513063430786, 'time_algorithm_update': 0.036845514059066775, 'critic_loss': 12.90493461561203, 'conservative_loss': -6.171933495716076e-05, 'alpha': 1.2314761431753142e-06, 'actor_loss': -8.858188957214356, 'temp': 0.20772936891019345, 'temp_loss': -0.0012248484338633716, 'time_step': 0.042231752634048464, 'td_error': 6.272101316959306, 'value_scale': 10.149622856961736, 'discounted_advantage': -8.06829102458294, 'initial_state': 13.611554145812988, 'diff_eval': 4548.964037750111} step=151000
2025-12-06 09:35.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:43<00:00, 22.88it/s, critic_loss=12.7, conservative_loss=-5.6e-5, alpha=1.13e-6, actor_loss=-8.88, temp=0.207, temp_loss=0.00301] 


2025-12-06 09:36.31 [info     ] CQL_20251206073907: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.0066307632923126225, 'time_algorithm_update': 0.036052775144577026, 'critic_loss': 12.683299218177796, 'conservative_loss': -5.598452985213953e-05, 'alpha': 1.1254991303530915e-06, 'actor_loss': -8.885031013488769, 'temp': 0.20666144342720508, 'temp_loss': 0.00308929560193792, 'time_step': 0.04299215412139892, 'td_error': 6.071094432751371, 'value_scale': 9.883363425819093, 'discounted_advantage': -8.06697187343816, 'initial_state': 15.7652006149292, 'diff_eval': 4424.419825636171} step=152000
2025-12-06 09:36.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.83it/s, critic_loss=12.7, conservative_loss=-5.15e-5, alpha=1.03e-6, actor_loss=-8.99, temp=0.205, temp_loss=-0.000296]


2025-12-06 09:37.18 [info     ] CQL_20251206073907: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.004951950311660767, 'time_algorithm_update': 0.036047024726867674, 'critic_loss': 12.721163460731507, 'conservative_loss': -5.146278990287101e-05, 'alpha': 1.028383411608047e-06, 'actor_loss': -8.987123731136322, 'temp': 0.20534180833399296, 'temp_loss': -0.00036488668876700104, 'time_step': 0.04127765059471131, 'td_error': 6.316035409870037, 'value_scale': 10.32165332508257, 'discounted_advantage': -7.83855048338502, 'initial_state': 13.283251762390137, 'diff_eval': 4293.184039065093} step=153000
2025-12-06 09:37.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.34it/s, critic_loss=12.9, conservative_loss=-4.72e-5, alpha=9.4e-7, actor_loss=-9, temp=0.205, temp_loss=0.000801]    


2025-12-06 09:38.04 [info     ] CQL_20251206073907: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.005136882543563843, 'time_algorithm_update': 0.036724463939666746, 'critic_loss': 12.853868734836578, 'conservative_loss': -4.7180758949252774e-05, 'alpha': 9.391601655011073e-07, 'actor_loss': -8.997638525485993, 'temp': 0.20456934410333633, 'temp_loss': 0.0008326789354905486, 'time_step': 0.04215219855308533, 'td_error': 5.8804239575392945, 'value_scale': 9.963511504816815, 'discounted_advantage': -8.116532196052853, 'initial_state': 13.796965599060059, 'diff_eval': 3970.834098580277} step=154000
2025-12-06 09:38.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.89it/s, critic_loss=12.9, conservative_loss=-4.31e-5, alpha=8.58e-7, actor_loss=-9.06, temp=0.205, temp_loss=-0.00118]


2025-12-06 09:38.49 [info     ] CQL_20251206073907: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.004886231660842896, 'time_algorithm_update': 0.036039772272109985, 'critic_loss': 12.87165664958954, 'conservative_loss': -4.308628832586692e-05, 'alpha': 8.579557625694178e-07, 'actor_loss': -9.061424996376038, 'temp': 0.20481942199170589, 'temp_loss': -0.0011247537934686988, 'time_step': 0.04120474767684937, 'td_error': 6.132692708086117, 'value_scale': 10.409517439108027, 'discounted_advantage': -8.230545183698782, 'initial_state': 13.914628982543945, 'diff_eval': 3924.723373390328} step=155000
2025-12-06 09:38.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.77it/s, critic_loss=12.8, conservative_loss=-3.9e-5, alpha=7.84e-7, actor_loss=-9.14, temp=0.205, temp_loss=0.00123]  


2025-12-06 09:39.35 [info     ] CQL_20251206073907: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.004982476472854614, 'time_algorithm_update': 0.036161478042602536, 'critic_loss': 12.850346779823303, 'conservative_loss': -3.901727889024187e-05, 'alpha': 7.838208929911161e-07, 'actor_loss': -9.1393330245018, 'temp': 0.20531493562459946, 'temp_loss': 0.0010468226781813429, 'time_step': 0.04141752696037292, 'td_error': 6.3915779368057954, 'value_scale': 10.638097212065171, 'discounted_advantage': -8.410864579191621, 'initial_state': 13.415815353393555, 'diff_eval': 4296.994347455115} step=156000
2025-12-06 09:39.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.66it/s, critic_loss=12.7, conservative_loss=-3.54e-5, alpha=7.17e-7, actor_loss=-9.3, temp=0.205, temp_loss=-0.00273] 


2025-12-06 09:40.21 [info     ] CQL_20251206073907: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.005051607847213745, 'time_algorithm_update': 0.036306637525558474, 'critic_loss': 12.721600467205048, 'conservative_loss': -3.5418003273662176e-05, 'alpha': 7.166119333987808e-07, 'actor_loss': -9.306277126312256, 'temp': 0.20461271032691003, 'temp_loss': -0.002923570732586086, 'time_step': 0.04163602209091186, 'td_error': 5.771949307215675, 'value_scale': 9.697375413706434, 'discounted_advantage': -8.217099906298614, 'initial_state': 13.87939739227295, 'diff_eval': 3867.620230637737} step=157000
2025-12-06 09:40.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.39it/s, critic_loss=13.2, conservative_loss=-3.22e-5, alpha=6.55e-7, actor_loss=-9.47, temp=0.207, temp_loss=0.00178] 


2025-12-06 09:41.07 [info     ] CQL_20251206073907: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.005131466388702393, 'time_algorithm_update': 0.03666629838943482, 'critic_loss': 13.217280609607696, 'conservative_loss': -3.218964434927329e-05, 'alpha': 6.549232126076277e-07, 'actor_loss': -9.466299661636352, 'temp': 0.2068062347471714, 'temp_loss': 0.0017668093288084493, 'time_step': 0.04208317255973816, 'td_error': 6.3262084530506515, 'value_scale': 10.58529406140987, 'discounted_advantage': -8.177226573050524, 'initial_state': 14.116690635681152, 'diff_eval': 4239.2142016618345} step=158000
2025-12-06 09:41.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.57it/s, critic_loss=13.4, conservative_loss=-2.92e-5, alpha=5.99e-7, actor_loss=-9.67, temp=0.207, temp_loss=-0.00145]


2025-12-06 09:41.53 [info     ] CQL_20251206073907: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.00503947901725769, 'time_algorithm_update': 0.0364468834400177, 'critic_loss': 13.351883763313294, 'conservative_loss': -2.91508136815537e-05, 'alpha': 5.987647975302934e-07, 'actor_loss': -9.667753704071044, 'temp': 0.20687789097428322, 'temp_loss': -0.001362119274563156, 'time_step': 0.041766719341278076, 'td_error': 5.8768926923605855, 'value_scale': 10.669117819371088, 'discounted_advantage': -8.377282169175192, 'initial_state': 16.299205780029297, 'diff_eval': 4356.2373242411895} step=159000
2025-12-06 09:41.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.58it/s, critic_loss=13.6, conservative_loss=-2.67e-5, alpha=5.48e-7, actor_loss=-9.89, temp=0.207, temp_loss=-0.000678]


2025-12-06 09:42.39 [info     ] CQL_20251206073907: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.00508031702041626, 'time_algorithm_update': 0.03637106347084045, 'critic_loss': 13.672037321090698, 'conservative_loss': -2.671098145765427e-05, 'alpha': 5.473006071952114e-07, 'actor_loss': -9.888099829673767, 'temp': 0.20696964572370052, 'temp_loss': -0.0006474364643217996, 'time_step': 0.04174608945846558, 'td_error': 6.237133815480869, 'value_scale': 10.384690321168463, 'discounted_advantage': -8.544466951833135, 'initial_state': 13.924426078796387, 'diff_eval': 3936.8828881884624} step=160000
2025-12-06 09:42.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.66it/s, critic_loss=13.7, conservative_loss=-2.44e-5, alpha=5e-7, actor_loss=-10.1, temp=0.207, temp_loss=0.00143]   


2025-12-06 09:43.25 [info     ] CQL_20251206073907: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.00503303074836731, 'time_algorithm_update': 0.03628752827644348, 'critic_loss': 13.696839143514634, 'conservative_loss': -2.4414548792265124e-05, 'alpha': 5.000666901651129e-07, 'actor_loss': -10.101982390403748, 'temp': 0.20739032125473023, 'temp_loss': 0.0012327311232220382, 'time_step': 0.04160735750198364, 'td_error': 6.430276523677098, 'value_scale': 10.939700823725332, 'discounted_advantage': -9.03679585521828, 'initial_state': 14.506433486938477, 'diff_eval': 4125.265781532364} step=161000
2025-12-06 09:43.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.16it/s, critic_loss=14, conservative_loss=-2.19e-5, alpha=4.57e-7, actor_loss=-10.4, temp=0.206, temp_loss=0.00152]   


2025-12-06 09:44.09 [info     ] CQL_20251206073907: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.004913284540176392, 'time_algorithm_update': 0.03557888221740722, 'critic_loss': 14.039508605718613, 'conservative_loss': -2.1934773181783386e-05, 'alpha': 4.5710314361713243e-07, 'actor_loss': -10.358319964408874, 'temp': 0.20609024932980538, 'temp_loss': 0.0014375810304190963, 'time_step': 0.04075997948646545, 'td_error': 6.088096617324691, 'value_scale': 10.847620329754044, 'discounted_advantage': -9.365020189517395, 'initial_state': 15.429354667663574, 'diff_eval': 4032.9258997593206} step=162000
2025-12-06 09:44.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.61it/s, critic_loss=14.5, conservative_loss=-2e-5, alpha=4.18e-7, actor_loss=-10.6, temp=0.206, temp_loss=0.000122]   


2025-12-06 09:44.55 [info     ] CQL_20251206073907: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.005061911106109619, 'time_algorithm_update': 0.036353970527648924, 'critic_loss': 14.490369338989257, 'conservative_loss': -1.9951965601649134e-05, 'alpha': 4.1800350280141176e-07, 'actor_loss': -10.573947591781616, 'temp': 0.20556944359838963, 'temp_loss': 0.0002729228506796062, 'time_step': 0.04169435667991638, 'td_error': 6.697084173748392, 'value_scale': 11.36471280777844, 'discounted_advantage': -9.026099885716748, 'initial_state': 14.440448760986328, 'diff_eval': 4079.8519675288685} step=163000
2025-12-06 09:44.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.68it/s, critic_loss=14.7, conservative_loss=-1.82e-5, alpha=3.82e-7, actor_loss=-10.8, temp=0.204, temp_loss=-0.000457]


2025-12-06 09:45.41 [info     ] CQL_20251206073907: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.005019166469573975, 'time_algorithm_update': 0.03629104518890381, 'critic_loss': 14.738756860256196, 'conservative_loss': -1.8159914635361928e-05, 'alpha': 3.8209287654922263e-07, 'actor_loss': -10.78291028881073, 'temp': 0.20428866612911226, 'temp_loss': -0.00040966221515554934, 'time_step': 0.04157860398292541, 'td_error': 6.400605893647505, 'value_scale': 11.541979368075054, 'discounted_advantage': -9.497578824940758, 'initial_state': 16.015254974365234, 'diff_eval': 4286.0330705705655} step=164000
2025-12-06 09:45.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.45it/s, critic_loss=15, conservative_loss=-1.65e-5, alpha=3.49e-7, actor_loss=-10.9, temp=0.207, temp_loss=-0.00266] 


2025-12-06 09:46.27 [info     ] CQL_20251206073907: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.005043079853057861, 'time_algorithm_update': 0.036662524700164796, 'critic_loss': 15.026290540218353, 'conservative_loss': -1.6479901103593875e-05, 'alpha': 3.493192648420518e-07, 'actor_loss': -10.934084259986877, 'temp': 0.20659195068478584, 'temp_loss': -0.002795416134293191, 'time_step': 0.041984252214431764, 'td_error': 6.510277522160876, 'value_scale': 11.534313639566577, 'discounted_advantage': -9.833834591412288, 'initial_state': 16.428157806396484, 'diff_eval': 4529.011482029502} step=165000
2025-12-06 09:46.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.40it/s, critic_loss=15.5, conservative_loss=-1.49e-5, alpha=3.2e-7, actor_loss=-11.1, temp=0.21, temp_loss=-0.00191]  


2025-12-06 09:47.13 [info     ] CQL_20251206073907: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.005160749673843384, 'time_algorithm_update': 0.0366292188167572, 'critic_loss': 15.475799676418305, 'conservative_loss': -1.490142710008513e-05, 'alpha': 3.1942834792175743e-07, 'actor_loss': -11.124425515174865, 'temp': 0.20962546662986278, 'temp_loss': -0.0019870477030053736, 'time_step': 0.0420778694152832, 'td_error': 6.3862182856494005, 'value_scale': 11.362743154408571, 'discounted_advantage': -9.664711170675407, 'initial_state': 16.844240188598633, 'diff_eval': 4259.515950563204} step=166000
2025-12-06 09:47.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.83it/s, critic_loss=16, conservative_loss=-1.35e-5, alpha=2.92e-7, actor_loss=-11.3, temp=0.21, temp_loss=0.00026]   


2025-12-06 09:47.59 [info     ] CQL_20251206073907: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.004954099178314209, 'time_algorithm_update': 0.036056476593017577, 'critic_loss': 15.986930975437165, 'conservative_loss': -1.3447707508021267e-05, 'alpha': 2.9214584145620393e-07, 'actor_loss': -11.275893740653991, 'temp': 0.20996828162670136, 'temp_loss': 0.000249651919468306, 'time_step': 0.04130047631263733, 'td_error': 6.717888052316884, 'value_scale': 11.665048104146463, 'discounted_advantage': -10.288691466341838, 'initial_state': 16.50098419189453, 'diff_eval': 4220.394387201908} step=167000
2025-12-06 09:47.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.63it/s, critic_loss=16.1, conservative_loss=-1.23e-5, alpha=2.67e-7, actor_loss=-11.4, temp=0.21, temp_loss=0.000291]


2025-12-06 09:48.45 [info     ] CQL_20251206073907: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.00500379753112793, 'time_algorithm_update': 0.03636657667160034, 'critic_loss': 16.115476650953294, 'conservative_loss': -1.2282130985113327e-05, 'alpha': 2.6718021155147654e-07, 'actor_loss': -11.382111075401307, 'temp': 0.20958331488072873, 'temp_loss': 0.0002834318978711963, 'time_step': 0.04165746521949768, 'td_error': 6.591847311273865, 'value_scale': 11.950544910216571, 'discounted_advantage': -9.93712895394003, 'initial_state': 17.59598159790039, 'diff_eval': 4486.053308192785} step=168000
2025-12-06 09:48.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.45it/s, critic_loss=16.5, conservative_loss=-1.13e-5, alpha=2.44e-7, actor_loss=-11.5, temp=0.212, temp_loss=-0.00291]


2025-12-06 09:49.31 [info     ] CQL_20251206073907: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.005104854583740235, 'time_algorithm_update': 0.03652300667762756, 'critic_loss': 16.461690644741058, 'conservative_loss': -1.1250087568441814e-05, 'alpha': 2.441187928496902e-07, 'actor_loss': -11.513860684394837, 'temp': 0.21191213127970696, 'temp_loss': -0.002878491563606076, 'time_step': 0.04192924976348877, 'td_error': 7.501843933473192, 'value_scale': 12.248025435802885, 'discounted_advantage': -10.39675320716559, 'initial_state': 15.766427040100098, 'diff_eval': 4213.504513363409} step=169000
2025-12-06 09:49.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.78it/s, critic_loss=16.9, conservative_loss=-1.02e-5, alpha=2.23e-7, actor_loss=-11.6, temp=0.214, temp_loss=-0.00229]


2025-12-06 09:50.16 [info     ] CQL_20251206073907: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.005003345251083374, 'time_algorithm_update': 0.03610383129119873, 'critic_loss': 16.916493014335632, 'conservative_loss': -1.0223995995147561e-05, 'alpha': 2.2312163142146347e-07, 'actor_loss': -11.625410241127014, 'temp': 0.21421584223210813, 'temp_loss': -0.0023285554908798077, 'time_step': 0.041389265060424806, 'td_error': 6.90647286752528, 'value_scale': 12.175284751299815, 'discounted_advantage': -10.504571362779464, 'initial_state': 19.107650756835938, 'diff_eval': 4186.151836191961} step=170000
2025-12-06 09:50.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.85it/s, critic_loss=17.2, conservative_loss=-9.13e-6, alpha=2.04e-7, actor_loss=-11.7, temp=0.216, temp_loss=0.000102]


2025-12-06 09:51.02 [info     ] CQL_20251206073907: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.004981855869293213, 'time_algorithm_update': 0.03597226929664612, 'critic_loss': 17.192441703796387, 'conservative_loss': -9.130453838679386e-06, 'alpha': 2.0411285274235525e-07, 'actor_loss': -11.669091141700745, 'temp': 0.21622720333933831, 'temp_loss': 4.2901150649413464e-05, 'time_step': 0.04124332427978516, 'td_error': 7.206967964020241, 'value_scale': 12.260123814519398, 'discounted_advantage': -10.824341447790081, 'initial_state': 17.454404830932617, 'diff_eval': 4151.1350854259745} step=171000
2025-12-06 09:51.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.54it/s, critic_loss=17.5, conservative_loss=-8.32e-6, alpha=1.87e-7, actor_loss=-11.8, temp=0.217, temp_loss=-0.00231]


2025-12-06 09:51.48 [info     ] CQL_20251206073907: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.005044370174407959, 'time_algorithm_update': 0.036472113847732544, 'critic_loss': 17.564463841438293, 'conservative_loss': -8.318812462221104e-06, 'alpha': 1.8672247536244413e-07, 'actor_loss': -11.762237830162048, 'temp': 0.21695252868533135, 'temp_loss': -0.0023123258017003537, 'time_step': 0.04179633641242981, 'td_error': 7.459459616377224, 'value_scale': 12.599614106993014, 'discounted_advantage': -10.62214239615845, 'initial_state': 17.01942253112793, 'diff_eval': 4267.873488035668} step=172000
2025-12-06 09:51.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.59it/s, critic_loss=17.9, conservative_loss=-7.6e-6, alpha=1.71e-7, actor_loss=-11.9, temp=0.22, temp_loss=-0.000887] 


2025-12-06 09:52.34 [info     ] CQL_20251206073907: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.005026433706283569, 'time_algorithm_update': 0.036384316682815554, 'critic_loss': 17.857544269561767, 'conservative_loss': -7.596997701966757e-06, 'alpha': 1.7070855398060302e-07, 'actor_loss': -11.878538115501403, 'temp': 0.21981152822077274, 'temp_loss': -0.0009683875039918348, 'time_step': 0.04170278716087341, 'td_error': 7.844393444484336, 'value_scale': 12.60673066470007, 'discounted_advantage': -11.403366181681779, 'initial_state': 15.870838165283203, 'diff_eval': 4266.687802754606} step=173000
2025-12-06 09:52.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.78it/s, critic_loss=17.7, conservative_loss=-6.94e-6, alpha=1.56e-7, actor_loss=-11.9, temp=0.22, temp_loss=-0.00133] 


2025-12-06 09:53.19 [info     ] CQL_20251206073907: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.00502212929725647, 'time_algorithm_update': 0.036082178831100466, 'critic_loss': 17.73156102991104, 'conservative_loss': -6.935066985533922e-06, 'alpha': 1.560636074202648e-07, 'actor_loss': -11.920666538238525, 'temp': 0.22032316817343234, 'temp_loss': -0.0013562107088509947, 'time_step': 0.041403164863586424, 'td_error': 8.501479094862031, 'value_scale': 12.912349396334509, 'discounted_advantage': -11.674346137149273, 'initial_state': 15.363102912902832, 'diff_eval': 5056.139563898919} step=174000
2025-12-06 09:53.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.49it/s, critic_loss=17.9, conservative_loss=-6.25e-6, alpha=1.43e-7, actor_loss=-12, temp=0.223, temp_loss=-0.000546] 


2025-12-06 09:54.05 [info     ] CQL_20251206073907: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.005197438478469848, 'time_algorithm_update': 0.036402374505996704, 'critic_loss': 17.898132420063018, 'conservative_loss': -6.246646540603251e-06, 'alpha': 1.42694269058552e-07, 'actor_loss': -12.024001554489136, 'temp': 0.22274013885855676, 'temp_loss': -0.0007849990983959288, 'time_step': 0.04190046119689941, 'td_error': 7.68016426054748, 'value_scale': 13.389269523370983, 'discounted_advantage': -12.136036068730329, 'initial_state': 17.986833572387695, 'diff_eval': 3951.8277618629545} step=175000
2025-12-06 09:54.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.19it/s, critic_loss=18.2, conservative_loss=-5.73e-6, alpha=1.31e-7, actor_loss=-12.1, temp=0.225, temp_loss=-0.00317]


2025-12-06 09:54.52 [info     ] CQL_20251206073907: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.005188433408737182, 'time_algorithm_update': 0.036947524785995486, 'critic_loss': 18.22732959508896, 'conservative_loss': -5.7332595520165346e-06, 'alpha': 1.3046656849269312e-07, 'actor_loss': -12.122912796020508, 'temp': 0.2253673898726702, 'temp_loss': -0.0031647396585904063, 'time_step': 0.04242629528045654, 'td_error': 8.096202937668409, 'value_scale': 13.51624400498094, 'discounted_advantage': -11.83452692297984, 'initial_state': 17.535564422607422, 'diff_eval': 4511.440626026762} step=176000
2025-12-06 09:54.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.75it/s, critic_loss=18.5, conservative_loss=-5.22e-6, alpha=1.19e-7, actor_loss=-12.2, temp=0.226, temp_loss=-0.000258]


2025-12-06 09:55.38 [info     ] CQL_20251206073907: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.004971179962158203, 'time_algorithm_update': 0.03611605381965637, 'critic_loss': 18.485804355144502, 'conservative_loss': -5.219605476668221e-06, 'alpha': 1.192595147188058e-07, 'actor_loss': -12.176823135375976, 'temp': 0.22649557231366635, 'temp_loss': -0.00024388266436289996, 'time_step': 0.041403223991394046, 'td_error': 8.136439159049152, 'value_scale': 13.969515181956103, 'discounted_advantage': -11.968101748219139, 'initial_state': 18.90234375, 'diff_eval': 4307.368352974642} step=177000
2025-12-06 09:55.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.05it/s, critic_loss=18.6, conservative_loss=-4.69e-6, alpha=1.09e-7, actor_loss=-12.2, temp=0.227, temp_loss=0.00113] 


2025-12-06 09:56.25 [info     ] CQL_20251206073907: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.005631513118743896, 'time_algorithm_update': 0.03677936673164368, 'critic_loss': 18.58187476968765, 'conservative_loss': -4.690302165272442e-06, 'alpha': 1.0906354077633296e-07, 'actor_loss': -12.21291000366211, 'temp': 0.22680906203389167, 'temp_loss': 0.001173237082315609, 'time_step': 0.04269646739959717, 'td_error': 8.346996912830631, 'value_scale': 13.816609427101804, 'discounted_advantage': -12.477188047485765, 'initial_state': 17.455257415771484, 'diff_eval': 4652.614094868077} step=178000
2025-12-06 09:56.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.50it/s, critic_loss=18.7, conservative_loss=-4.27e-6, alpha=9.98e-8, actor_loss=-12.2, temp=0.225, temp_loss=0.000797]


2025-12-06 09:57.11 [info     ] CQL_20251206073907: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.005114454507827759, 'time_algorithm_update': 0.03644496726989746, 'critic_loss': 18.716606933116914, 'conservative_loss': -4.267496719421615e-06, 'alpha': 9.979956082872831e-08, 'actor_loss': -12.192685551643372, 'temp': 0.22497808976471423, 'temp_loss': 0.0007360510344151407, 'time_step': 0.041869263887405396, 'td_error': 7.995852328571626, 'value_scale': 13.805047881816828, 'discounted_advantage': -11.84141656118299, 'initial_state': 17.389156341552734, 'diff_eval': 4281.283130931953} step=179000
2025-12-06 09:57.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.54it/s, critic_loss=18.5, conservative_loss=-3.89e-6, alpha=9.13e-8, actor_loss=-12.2, temp=0.226, temp_loss=-0.00249]


2025-12-06 09:57.57 [info     ] CQL_20251206073907: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.004917590141296386, 'time_algorithm_update': 0.036564647674560545, 'critic_loss': 18.531057326793672, 'conservative_loss': -3.894050222470469e-06, 'alpha': 9.127575697220891e-08, 'actor_loss': -12.240828198432922, 'temp': 0.226222362190485, 'temp_loss': -0.0025359198226360606, 'time_step': 0.041771175384521485, 'td_error': 8.105044609097204, 'value_scale': 14.683424971804135, 'discounted_advantage': -12.3055161376319, 'initial_state': 19.370346069335938, 'diff_eval': 4504.087723355152} step=180000
2025-12-06 09:57.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.70it/s, critic_loss=18.9, conservative_loss=-3.52e-6, alpha=8.35e-8, actor_loss=-12.3, temp=0.228, temp_loss=-0.00127]


2025-12-06 09:58.42 [info     ] CQL_20251206073907: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.00493321442604065, 'time_algorithm_update': 0.0363089919090271, 'critic_loss': 18.915690670490264, 'conservative_loss': -3.516449380185804e-06, 'alpha': 8.347072490977326e-08, 'actor_loss': -12.279074451446533, 'temp': 0.22761473621428013, 'temp_loss': -0.0013052095369203017, 'time_step': 0.04151189708709717, 'td_error': 8.807002224623513, 'value_scale': 13.722072565928102, 'discounted_advantage': -12.62357873009321, 'initial_state': 17.221843719482422, 'diff_eval': 4448.769728774831} step=181000
2025-12-06 09:58.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.90it/s, critic_loss=18.8, conservative_loss=-3.22e-6, alpha=7.64e-8, actor_loss=-12.3, temp=0.23, temp_loss=-0.0017] 


2025-12-06 09:59.28 [info     ] CQL_20251206073907: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.004730043649673462, 'time_algorithm_update': 0.036134890556335446, 'critic_loss': 18.839937695503234, 'conservative_loss': -3.2188294092065916e-06, 'alpha': 7.634789679400455e-08, 'actor_loss': -12.288830586433411, 'temp': 0.2302096066325903, 'temp_loss': -0.0016856722198426723, 'time_step': 0.041152361154556276, 'td_error': 8.865959610182236, 'value_scale': 13.998426932621394, 'discounted_advantage': -12.538529125061993, 'initial_state': 17.202878952026367, 'diff_eval': 3999.7651097076523} step=182000
2025-12-06 09:59.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.95it/s, critic_loss=19.2, conservative_loss=-2.92e-6, alpha=6.99e-8, actor_loss=-12.4, temp=0.232, temp_loss=-0.00211]


2025-12-06 10:00.13 [info     ] CQL_20251206073907: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.004959728240966797, 'time_algorithm_update': 0.03580322647094727, 'critic_loss': 19.20015934419632, 'conservative_loss': -2.920362868508164e-06, 'alpha': 6.98261145899437e-08, 'actor_loss': -12.418692958831787, 'temp': 0.2319228438138962, 'temp_loss': -0.0021225967423524708, 'time_step': 0.041060033559799196, 'td_error': 8.295552159618858, 'value_scale': 14.387993559156984, 'discounted_advantage': -12.6891029175462, 'initial_state': 19.26806640625, 'diff_eval': 3829.296371202015} step=183000
2025-12-06 10:00.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.71it/s, critic_loss=19.2, conservative_loss=-2.67e-6, alpha=6.39e-8, actor_loss=-12.5, temp=0.233, temp_loss=-0.000444]


2025-12-06 10:00.59 [info     ] CQL_20251206073907: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.005027294397354126, 'time_algorithm_update': 0.036154418230056766, 'critic_loss': 19.1779916305542, 'conservative_loss': -2.6725493195272065e-06, 'alpha': 6.383254001463002e-08, 'actor_loss': -12.4989603099823, 'temp': 0.2330931273251772, 'temp_loss': -0.000525991074857302, 'time_step': 0.04146838974952698, 'td_error': 9.001836431470114, 'value_scale': 13.983685889339881, 'discounted_advantage': -12.74329782263505, 'initial_state': 17.62955093383789, 'diff_eval': 4278.197119304646} step=184000
2025-12-06 10:00.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.92it/s, critic_loss=19.4, conservative_loss=-2.39e-6, alpha=5.85e-8, actor_loss=-12.6, temp=0.235, temp_loss=-0.0023] 


2025-12-06 10:01.44 [info     ] CQL_20251206073907: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.004950668573379517, 'time_algorithm_update': 0.03588070702552795, 'critic_loss': 19.449613025188444, 'conservative_loss': -2.3886172511993207e-06, 'alpha': 5.8432039818256954e-08, 'actor_loss': -12.646469676971435, 'temp': 0.23508198636770247, 'temp_loss': -0.002266649439930916, 'time_step': 0.04111995458602905, 'td_error': 8.188462384360697, 'value_scale': 14.680619634653551, 'discounted_advantage': -12.74024076783119, 'initial_state': 19.641820907592773, 'diff_eval': 3858.2057811244886} step=185000
2025-12-06 10:01.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.58it/s, critic_loss=19.4, conservative_loss=-2.16e-6, alpha=5.35e-8, actor_loss=-12.8, temp=0.237, temp_loss=-0.00149]


2025-12-06 10:02.30 [info     ] CQL_20251206073907: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.005026084899902344, 'time_algorithm_update': 0.036402694225311276, 'critic_loss': 19.452142251968382, 'conservative_loss': -2.1629823974080864e-06, 'alpha': 5.348728313592233e-08, 'actor_loss': -12.84193064880371, 'temp': 0.23655932749807834, 'temp_loss': -0.0015191891802242025, 'time_step': 0.041714918613433835, 'td_error': 9.11055907232463, 'value_scale': 14.676208994861096, 'discounted_advantage': -12.699251829992303, 'initial_state': 17.76718521118164, 'diff_eval': 4112.806411555884} step=186000
2025-12-06 10:02.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.75it/s, critic_loss=19.8, conservative_loss=-1.97e-6, alpha=4.9e-8, actor_loss=-13.1, temp=0.24, temp_loss=-0.000962]


2025-12-06 10:03.16 [info     ] CQL_20251206073907: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.005020313978195191, 'time_algorithm_update': 0.0361365556716919, 'critic_loss': 19.836072433948516, 'conservative_loss': -1.9652662514317853e-06, 'alpha': 4.8949539632303643e-08, 'actor_loss': -13.05626887512207, 'temp': 0.23964369881153108, 'temp_loss': -0.001058355882181786, 'time_step': 0.041442506790161135, 'td_error': 9.66981737142956, 'value_scale': 14.896393365595156, 'discounted_advantage': -12.716809390896431, 'initial_state': 18.526653289794922, 'diff_eval': 4055.4047615702893} step=187000
2025-12-06 10:03.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.59it/s, critic_loss=20.2, conservative_loss=-1.78e-6, alpha=4.48e-8, actor_loss=-13.4, temp=0.238, temp_loss=0.00174]


2025-12-06 10:04.02 [info     ] CQL_20251206073907: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.005031074523925781, 'time_algorithm_update': 0.03637664031982422, 'critic_loss': 20.244294447898866, 'conservative_loss': -1.782117272796313e-06, 'alpha': 4.4818997359641343e-08, 'actor_loss': -13.375815810203552, 'temp': 0.2379249610900879, 'temp_loss': 0.0017554482088889926, 'time_step': 0.04170978474617004, 'td_error': 8.230513831430784, 'value_scale': 15.036690494172863, 'discounted_advantage': -12.363445987273192, 'initial_state': 20.749448776245117, 'diff_eval': 3696.1108987170246} step=188000
2025-12-06 10:04.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.85it/s, critic_loss=20.7, conservative_loss=-1.64e-6, alpha=4.1e-8, actor_loss=-13.5, temp=0.239, temp_loss=-0.00204]


2025-12-06 10:04.47 [info     ] CQL_20251206073907: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.005000560522079468, 'time_algorithm_update': 0.03592951679229736, 'critic_loss': 20.694442623615267, 'conservative_loss': -1.6349240418094269e-06, 'alpha': 4.09997741570578e-08, 'actor_loss': -13.532819625854492, 'temp': 0.23940350091457366, 'temp_loss': -0.0018761980794370174, 'time_step': 0.04122604036331177, 'td_error': 8.827499229061287, 'value_scale': 14.556882542508832, 'discounted_advantage': -12.923199169488571, 'initial_state': 19.55327606201172, 'diff_eval': 4365.740243645704} step=189000
2025-12-06 10:04.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.96it/s, critic_loss=21.2, conservative_loss=-1.49e-6, alpha=3.75e-8, actor_loss=-13.7, temp=0.241, temp_loss=-0.000688]


2025-12-06 10:05.32 [info     ] CQL_20251206073907: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.004909485340118408, 'time_algorithm_update': 0.03583735728263855, 'critic_loss': 21.220831802845, 'conservative_loss': -1.4903418085623344e-06, 'alpha': 3.751155698950015e-08, 'actor_loss': -13.69190469455719, 'temp': 0.2405755342692137, 'temp_loss': -0.0007626037755981088, 'time_step': 0.04103646612167358, 'td_error': 9.092673401335558, 'value_scale': 14.351170049463489, 'discounted_advantage': -12.888470861709932, 'initial_state': 16.943300247192383, 'diff_eval': 3635.684165603041} step=190000
2025-12-06 10:05.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.28it/s, critic_loss=20.9, conservative_loss=-1.35e-6, alpha=3.43e-8, actor_loss=-13.8, temp=0.24, temp_loss=-0.000729]


2025-12-06 10:06.19 [info     ] CQL_20251206073907: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.005072171211242676, 'time_algorithm_update': 0.03688778495788574, 'critic_loss': 20.93008455848694, 'conservative_loss': -1.3542149175123085e-06, 'alpha': 3.4330103275692635e-08, 'actor_loss': -13.796124319076538, 'temp': 0.24042713990807532, 'temp_loss': -0.0006555618082638829, 'time_step': 0.0422607581615448, 'td_error': 9.421801028306032, 'value_scale': 14.606153426131378, 'discounted_advantage': -12.630053537690772, 'initial_state': 17.320682525634766, 'diff_eval': 3843.778441653282} step=191000
2025-12-06 10:06.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.13it/s, critic_loss=21.6, conservative_loss=-1.22e-6, alpha=3.14e-8, actor_loss=-13.9, temp=0.242, temp_loss=-0.000606]


2025-12-06 10:07.04 [info     ] CQL_20251206073907: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.00485764741897583, 'time_algorithm_update': 0.03561652112007141, 'critic_loss': 21.627887508869172, 'conservative_loss': -1.2200277739111699e-06, 'alpha': 3.14297695602761e-08, 'actor_loss': -13.890305467605591, 'temp': 0.24199962720274926, 'temp_loss': -0.0006877172696404159, 'time_step': 0.04076064205169678, 'td_error': 9.169313724168802, 'value_scale': 13.690551111783773, 'discounted_advantage': -11.992276601828351, 'initial_state': 15.617671012878418, 'diff_eval': 3859.1322637018206} step=192000
2025-12-06 10:07.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.30it/s, critic_loss=21.8, conservative_loss=-1.14e-6, alpha=2.88e-8, actor_loss=-13.9, temp=0.243, temp_loss=-0.00125]


2025-12-06 10:07.50 [info     ] CQL_20251206073907: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.005121470928192139, 'time_algorithm_update': 0.036786107301712036, 'critic_loss': 21.806483110427855, 'conservative_loss': -1.1410656292127896e-06, 'alpha': 2.875649224520771e-08, 'actor_loss': -13.932156432151794, 'temp': 0.243393901348114, 'temp_loss': -0.001248348580673337, 'time_step': 0.042208860874176024, 'td_error': 8.274999109133477, 'value_scale': 14.335074436505016, 'discounted_advantage': -12.063177369512891, 'initial_state': 19.611356735229492, 'diff_eval': 3749.2935043795524} step=193000
2025-12-06 10:07.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.02it/s, critic_loss=21.5, conservative_loss=-1.03e-6, alpha=2.63e-8, actor_loss=-13.9, temp=0.243, temp_loss=0.00195]


2025-12-06 10:08.37 [info     ] CQL_20251206073907: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.0049725940227508545, 'time_algorithm_update': 0.03750730443000794, 'critic_loss': 21.517459978580476, 'conservative_loss': -1.0279213985313618e-06, 'alpha': 2.6307849891438194e-08, 'actor_loss': -13.897930455207824, 'temp': 0.24262016098201275, 'temp_loss': 0.002004115796647966, 'time_step': 0.04277096939086914, 'td_error': 8.7253193586185, 'value_scale': 14.205759794818205, 'discounted_advantage': -12.011623058839703, 'initial_state': 17.791025161743164, 'diff_eval': 3756.0352811478265} step=194000
2025-12-06 10:08.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.60it/s, critic_loss=21.8, conservative_loss=-9.37e-7, alpha=2.41e-8, actor_loss=-13.8, temp=0.242, temp_loss=-0.00172]


2025-12-06 10:09.23 [info     ] CQL_20251206073907: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.004973623752593994, 'time_algorithm_update': 0.03644014763832092, 'critic_loss': 21.81013782787323, 'conservative_loss': -9.359924010254872e-07, 'alpha': 2.408712591339679e-08, 'actor_loss': -13.81543698310852, 'temp': 0.2419166266620159, 'temp_loss': -0.0017946345931850374, 'time_step': 0.04170302295684814, 'td_error': 8.353373411991784, 'value_scale': 13.704061965978793, 'discounted_advantage': -11.739397852069597, 'initial_state': 17.491199493408203, 'diff_eval': 4090.0057467135684} step=195000
2025-12-06 10:09.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.45it/s, critic_loss=21.8, conservative_loss=-8.69e-7, alpha=2.21e-8, actor_loss=-13.6, temp=0.243, temp_loss=-0.00146]


2025-12-06 10:10.09 [info     ] CQL_20251206073907: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.005003831148147583, 'time_algorithm_update': 0.03656607151031494, 'critic_loss': 21.786204553127288, 'conservative_loss': -8.685717194225617e-07, 'alpha': 2.2053305768210407e-08, 'actor_loss': -13.549130939483643, 'temp': 0.2431240768134594, 'temp_loss': -0.0015507500005187468, 'time_step': 0.04188618326187134, 'td_error': 8.674019002304522, 'value_scale': 13.37126256155954, 'discounted_advantage': -11.326101939993833, 'initial_state': 16.298059463500977, 'diff_eval': 3717.6517860608315} step=196000
2025-12-06 10:10.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, critic_loss=21, conservative_loss=-8.15e-7, alpha=2.02e-8, actor_loss=-13.3, temp=0.248, temp_loss=-0.00294] 


2025-12-06 10:10.55 [info     ] CQL_20251206073907: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.004949267387390137, 'time_algorithm_update': 0.03599705672264099, 'critic_loss': 21.00063109970093, 'conservative_loss': -8.136925127359973e-07, 'alpha': 2.0157400474118958e-08, 'actor_loss': -13.321267448425292, 'temp': 0.24781261932849885, 'temp_loss': -0.00283260115806479, 'time_step': 0.041260278940200804, 'td_error': 8.348947266695646, 'value_scale': 12.76642522639271, 'discounted_advantage': -11.372880525680719, 'initial_state': 16.04287338256836, 'diff_eval': 4260.8815313299165} step=197000
2025-12-06 10:10.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.66it/s, critic_loss=20.5, conservative_loss=-7.38e-7, alpha=1.84e-8, actor_loss=-13.1, temp=0.248, temp_loss=0.00248]


2025-12-06 10:11.40 [info     ] CQL_20251206073907: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.004940912008285523, 'time_algorithm_update': 0.03628554582595825, 'critic_loss': 20.527997587680815, 'conservative_loss': -7.371167542373769e-07, 'alpha': 1.844030238196126e-08, 'actor_loss': -13.053090512275697, 'temp': 0.24776553522050382, 'temp_loss': 0.0024014641616959126, 'time_step': 0.04153803944587708, 'td_error': 8.254678411770456, 'value_scale': 12.918523600913604, 'discounted_advantage': -10.803623692549023, 'initial_state': 16.488109588623047, 'diff_eval': 3919.73037906095} step=198000
2025-12-06 10:11.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.51it/s, critic_loss=20.7, conservative_loss=-6.7e-7, alpha=1.69e-8, actor_loss=-12.8, temp=0.246, temp_loss=0.000127] 


2025-12-06 10:12.26 [info     ] CQL_20251206073907: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.005042047262191773, 'time_algorithm_update': 0.03649682331085205, 'critic_loss': 20.69690638399124, 'conservative_loss': -6.694126940374189e-07, 'alpha': 1.6890381967726854e-08, 'actor_loss': -12.825338130950927, 'temp': 0.24573512817919255, 'temp_loss': 0.00010754108615219592, 'time_step': 0.04184866404533386, 'td_error': 7.91430218679577, 'value_scale': 12.435628790294787, 'discounted_advantage': -10.501995950687894, 'initial_state': 15.4049711227417, 'diff_eval': 4115.540833177259} step=199000
2025-12-06 10:12.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.88it/s, critic_loss=20.3, conservative_loss=-6.19e-7, alpha=1.55e-8, actor_loss=-12.5, temp=0.245, temp_loss=-0.000487]


2025-12-06 10:13.12 [info     ] CQL_20251206073907: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.004981234073638916, 'time_algorithm_update': 0.03590685319900513, 'critic_loss': 20.34026328754425, 'conservative_loss': -6.186358779132206e-07, 'alpha': 1.546816275777019e-08, 'actor_loss': -12.537385222434997, 'temp': 0.2453561713844538, 'temp_loss': -0.0003596519788261503, 'time_step': 0.041179612636566165, 'td_error': 7.262197521431319, 'value_scale': 12.271667444628834, 'discounted_advantage': -10.278454928603878, 'initial_state': 17.67739486694336, 'diff_eval': 4283.980097804282} step=200000
2025-12-06 10:13.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CQL_20251206073907\model_200000.d3
Training model:  CRR
2025-12-06 10:13.12 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=

Epoch 1/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.51it/s, critic_loss=0.578, actor_loss=-0.25] 


2025-12-06 10:13.33 [info     ] CRR_20251206101312: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.0020661938190460207, 'time_algorithm_update': 0.013363129138946533, 'critic_loss': 0.58215656388551, 'actor_loss': -0.25155185154115317, 'time_step': 0.015691667079925538, 'td_error': 1.24841395626711, 'value_scale': 3.4825176006801306, 'discounted_advantage': -2.826210526355691, 'initial_state': 3.689387559890747, 'diff_eval': 4508.300502922775} step=1000
2025-12-06 10:13.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.16it/s, critic_loss=2.16, actor_loss=-0.354]


2025-12-06 10:13.53 [info     ] CRR_20251206101312: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.0019936673641204835, 'time_algorithm_update': 0.01305068016052246, 'critic_loss': 2.1651110251545904, 'actor_loss': -0.354422277265985, 'time_step': 0.015293152570724488, 'td_error': 3.7669852042260867, 'value_scale': 6.919648257089656, 'discounted_advantage': -7.251590105342739, 'initial_state': 5.905816078186035, 'diff_eval': 4946.334256277277} step=2000
2025-12-06 10:13.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.26it/s, critic_loss=6.39, actor_loss=-0.374]


2025-12-06 10:14.13 [info     ] CRR_20251206101312: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.001971231460571289, 'time_algorithm_update': 0.01304760766029358, 'critic_loss': 6.416279786109924, 'actor_loss': -0.37150860435748473, 'time_step': 0.015267077445983887, 'td_error': 14.055573748332861, 'value_scale': 14.150146097841002, 'discounted_advantage': -21.303081827284565, 'initial_state': 11.510120391845703, 'diff_eval': 6505.258520936554} step=3000
2025-12-06 10:14.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.54it/s, critic_loss=18.3, actor_loss=-0.417]


2025-12-06 10:14.33 [info     ] CRR_20251206101312: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.0019982306957244873, 'time_algorithm_update': 0.013192524433135986, 'critic_loss': 18.361623512744902, 'actor_loss': -0.41632610174559886, 'time_step': 0.015433506488800049, 'td_error': 47.02635855712941, 'value_scale': 26.477830589660705, 'discounted_advantage': -45.151781849055276, 'initial_state': 19.931684494018555, 'diff_eval': 9802.582152373372} step=4000
2025-12-06 10:14.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:15<00:00, 65.38it/s, critic_loss=49.7, actor_loss=-0.623]


2025-12-06 10:14.53 [info     ] CRR_20251206101312: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.001934086561203003, 'time_algorithm_update': 0.012864365100860595, 'critic_loss': 49.8896265039444, 'actor_loss': -0.632790956660021, 'time_step': 0.015026337862014771, 'td_error': 141.0208942636178, 'value_scale': 50.45103758268764, 'discounted_advantage': -67.69948734551193, 'initial_state': 30.98149871826172, 'diff_eval': 4234.343688449357} step=5000
2025-12-06 10:14.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.93it/s, critic_loss=140, actor_loss=-0.914]


2025-12-06 10:15.14 [info     ] CRR_20251206101312: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.0020345723628997802, 'time_algorithm_update': 0.013583247184753417, 'critic_loss': 140.1318703918457, 'actor_loss': -0.9172880642930977, 'time_step': 0.015865938186645508, 'td_error': 321.5231748947578, 'value_scale': 80.83709462670248, 'discounted_advantage': -111.17735200119579, 'initial_state': 47.79099655151367, 'diff_eval': 3763.139170085056} step=6000
2025-12-06 10:15.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.42it/s, critic_loss=292, actor_loss=-1.15]


2025-12-06 10:15.34 [info     ] CRR_20251206101312: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.0019425346851348877, 'time_algorithm_update': 0.013065659761428833, 'critic_loss': 292.00925340271, 'actor_loss': -1.156842976467684, 'time_step': 0.015249454736709595, 'td_error': 607.8792213251048, 'value_scale': 121.6046456275422, 'discounted_advantage': -178.7303742434945, 'initial_state': 77.5825424194336, 'diff_eval': 3098.1905847230937} step=7000
2025-12-06 10:15.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:15<00:00, 65.10it/s, critic_loss=527, actor_loss=-1.23]


2025-12-06 10:15.54 [info     ] CRR_20251206101312: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.0019155979156494141, 'time_algorithm_update': 0.012916772365570069, 'critic_loss': 528.1289258956909, 'actor_loss': -1.2320990171029698, 'time_step': 0.015074371099472047, 'td_error': 1259.5628589442128, 'value_scale': 181.20663284276176, 'discounted_advantage': -225.34083295256553, 'initial_state': 116.63611602783203, 'diff_eval': 3508.335797015046} step=8000
2025-12-06 10:15.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.67it/s, critic_loss=1.19e+3, actor_loss=-1.43]


2025-12-06 10:16.14 [info     ] CRR_20251206101312: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.002007429361343384, 'time_algorithm_update': 0.013444971323013305, 'critic_loss': 1194.591568206787, 'actor_loss': -1.4309341657885817, 'time_step': 0.01568724536895752, 'td_error': 2893.597597361637, 'value_scale': 275.7743476490363, 'discounted_advantage': -407.23679647499324, 'initial_state': 184.15859985351562, 'diff_eval': 4641.844344914872} step=9000
2025-12-06 10:16.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.78it/s, critic_loss=2.5e+3, actor_loss=-1.44]


2025-12-06 10:16.35 [info     ] CRR_20251206101312: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.002039166212081909, 'time_algorithm_update': 0.013341008424758912, 'critic_loss': 2512.6400723876955, 'actor_loss': -1.4418671674717916, 'time_step': 0.015622605085372925, 'td_error': 6297.118881061277, 'value_scale': 411.4371414120619, 'discounted_advantage': -529.6802774052338, 'initial_state': 273.9862365722656, 'diff_eval': 4068.2814557984384} step=10000
2025-12-06 10:16.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.50it/s, critic_loss=5.04e+3, actor_loss=-1.36]


2025-12-06 10:16.55 [info     ] CRR_20251206101312: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.0020154435634613037, 'time_algorithm_update': 0.013160266160964966, 'critic_loss': 5057.6738209228515, 'actor_loss': -1.3570570593105513, 'time_step': 0.01543795919418335, 'td_error': 12808.132094377013, 'value_scale': 611.550990908064, 'discounted_advantage': -779.013405603159, 'initial_state': 441.5497131347656, 'diff_eval': 4636.957055974926} step=11000
2025-12-06 10:16.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.75it/s, critic_loss=1.01e+4, actor_loss=-1.48]


2025-12-06 10:17.16 [info     ] CRR_20251206101312: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.002082929611206055, 'time_algorithm_update': 0.01379027509689331, 'critic_loss': 10104.461794433593, 'actor_loss': -1.4821161619834602, 'time_step': 0.016135424852371214, 'td_error': 23195.67683049357, 'value_scale': 830.8447086049645, 'discounted_advantage': -1094.4067550534785, 'initial_state': 616.0958862304688, 'diff_eval': 4516.366935663491} step=12000
2025-12-06 10:17.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.93it/s, critic_loss=1.77e+4, actor_loss=-1.41]


2025-12-06 10:17.37 [info     ] CRR_20251206101312: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.0020590598583221436, 'time_algorithm_update': 0.013273965358734131, 'critic_loss': 17730.85068310547, 'actor_loss': -1.4146932100238045, 'time_step': 0.015588598728179932, 'td_error': 40027.42420929034, 'value_scale': 1125.067075206568, 'discounted_advantage': -1319.3225127076612, 'initial_state': 871.030029296875, 'diff_eval': 4170.303421546118} step=13000
2025-12-06 10:17.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.84it/s, critic_loss=2.86e+4, actor_loss=-1.57]


2025-12-06 10:17.57 [info     ] CRR_20251206101312: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.002009303092956543, 'time_algorithm_update': 0.01332864546775818, 'critic_loss': 28632.196571289063, 'actor_loss': -1.5716208184631542, 'time_step': 0.015606139183044434, 'td_error': 69707.55231504119, 'value_scale': 1510.3172182273386, 'discounted_advantage': -1639.197131627107, 'initial_state': 1210.5894775390625, 'diff_eval': 3929.4699600692306} step=14000
2025-12-06 10:17.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.70it/s, critic_loss=4.7e+4, actor_loss=-1.48]


2025-12-06 10:18.18 [info     ] CRR_20251206101312: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.002007545471191406, 'time_algorithm_update': 0.013409281969070435, 'critic_loss': 47075.17618701172, 'actor_loss': -1.491374602900818, 'time_step': 0.015665011644363402, 'td_error': 104097.97375388713, 'value_scale': 1898.9948325784687, 'discounted_advantage': -2179.3470003859156, 'initial_state': 1570.2142333984375, 'diff_eval': 4597.795269432797} step=15000
2025-12-06 10:18.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.65it/s, critic_loss=7.17e+4, actor_loss=-1.56]


2025-12-06 10:18.38 [info     ] CRR_20251206101312: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.002079850673675537, 'time_algorithm_update': 0.013352460145950318, 'critic_loss': 71901.6733203125, 'actor_loss': -1.5518138510398567, 'time_step': 0.015671183586120605, 'td_error': 157803.53620304886, 'value_scale': 2373.5302132720817, 'discounted_advantage': -2689.3189176785395, 'initial_state': 2007.329345703125, 'diff_eval': 4690.851470649301} step=16000
2025-12-06 10:18.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.37it/s, critic_loss=1.03e+5, actor_loss=-1.63]


2025-12-06 10:18.59 [info     ] CRR_20251206101312: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.0021326699256896974, 'time_algorithm_update': 0.01362138843536377, 'critic_loss': 103614.85858203124, 'actor_loss': -1.6345804215292445, 'time_step': 0.016003132581710815, 'td_error': 223963.46515047012, 'value_scale': 2902.814434604693, 'discounted_advantage': -3139.9086928787865, 'initial_state': 2527.81396484375, 'diff_eval': 4622.786501678609} step=17000
2025-12-06 10:18.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.68it/s, critic_loss=1.42e+5, actor_loss=-1.67]


2025-12-06 10:19.20 [info     ] CRR_20251206101312: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.0020241639614105226, 'time_algorithm_update': 0.01338930082321167, 'critic_loss': 141754.40883203124, 'actor_loss': -1.6684487299558242, 'time_step': 0.01566541028022766, 'td_error': 281500.4134356903, 'value_scale': 3337.5551739394514, 'discounted_advantage': -3681.9774921814783, 'initial_state': 2941.202880859375, 'diff_eval': 4278.406380568244} step=18000
2025-12-06 10:19.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.93it/s, critic_loss=1.82e+5, actor_loss=-1.83]


2025-12-06 10:19.40 [info     ] CRR_20251206101312: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.0019992029666900633, 'time_algorithm_update': 0.013122164964675904, 'critic_loss': 182618.42168359374, 'actor_loss': -1.8338056774344296, 'time_step': 0.015352136135101318, 'td_error': 351846.67455777596, 'value_scale': 3820.7219569804984, 'discounted_advantage': -4109.088523299015, 'initial_state': 3404.453857421875, 'diff_eval': 4299.9534147412005} step=19000
2025-12-06 10:19.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.53it/s, critic_loss=2.32e+5, actor_loss=-1.88]


2025-12-06 10:20.00 [info     ] CRR_20251206101312: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.0020133185386657713, 'time_algorithm_update': 0.013190188646316528, 'critic_loss': 231712.5670546875, 'actor_loss': -1.8617774219065906, 'time_step': 0.015447023391723633, 'td_error': 444244.0117837077, 'value_scale': 4369.452974279488, 'discounted_advantage': -4699.213960386066, 'initial_state': 3922.616455078125, 'diff_eval': 4611.477284802922} step=20000
2025-12-06 10:20.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.40it/s, critic_loss=2.92e+5, actor_loss=-1.29] 


2025-12-06 10:20.21 [info     ] CRR_20251206101312: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.0020242836475372313, 'time_algorithm_update': 0.013192530632019042, 'critic_loss': 292156.9182109375, 'actor_loss': -1.2998958494695836, 'time_step': 0.015464248418807983, 'td_error': 547565.4915867443, 'value_scale': 4904.186154770831, 'discounted_advantage': -4903.270316496992, 'initial_state': 4427.814453125, 'diff_eval': 4178.408522923473} step=21000
2025-12-06 10:20.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.55it/s, critic_loss=3.53e+5, actor_loss=-2.13]


2025-12-06 10:20.41 [info     ] CRR_20251206101312: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.0020207011699676512, 'time_algorithm_update': 0.01317063307762146, 'critic_loss': 353770.162078125, 'actor_loss': -2.1321230270802043, 'time_step': 0.015439544439315795, 'td_error': 648616.6072556942, 'value_scale': 5426.409207826088, 'discounted_advantage': -5603.546906197754, 'initial_state': 4885.169921875, 'diff_eval': 4009.3487628729667} step=22000
2025-12-06 10:20.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.79it/s, critic_loss=4.1e+5, actor_loss=-2.18]


2025-12-06 10:21.01 [info     ] CRR_20251206101312: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.0019229319095611572, 'time_algorithm_update': 0.012964672565460205, 'critic_loss': 411238.293125, 'actor_loss': -2.1826535103125497, 'time_step': 0.015138245344161987, 'td_error': 782601.4488490217, 'value_scale': 6072.836057216903, 'discounted_advantage': -6526.50908901457, 'initial_state': 5529.95703125, 'diff_eval': 4356.31459161454} step=23000
2025-12-06 10:21.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:15<00:00, 66.31it/s, critic_loss=5.08e+5, actor_loss=-2.43]


2025-12-06 10:21.20 [info     ] CRR_20251206101312: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.001883692502975464, 'time_algorithm_update': 0.012684630632400513, 'critic_loss': 508520.9677890625, 'actor_loss': -2.421680848459015, 'time_step': 0.014804481506347656, 'td_error': 955430.1515524299, 'value_scale': 6728.214712368373, 'discounted_advantage': -6958.366115755325, 'initial_state': 6115.15673828125, 'diff_eval': 3925.274861289702} step=24000
2025-12-06 10:21.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.57it/s, critic_loss=6.04e+5, actor_loss=-2.51]


2025-12-06 10:21.41 [info     ] CRR_20251206101312: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.0020177454948425294, 'time_algorithm_update': 0.013187774658203125, 'critic_loss': 604211.1253125, 'actor_loss': -2.5076392399910836, 'time_step': 0.015447945356369019, 'td_error': 1142961.2413501986, 'value_scale': 7467.042249527681, 'discounted_advantage': -7859.593116024627, 'initial_state': 6841.1611328125, 'diff_eval': 3992.5921929119854} step=25000
2025-12-06 10:21.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.83it/s, critic_loss=7.32e+5, actor_loss=-2.68]


2025-12-06 10:22.02 [info     ] CRR_20251206101312: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.0021955680847167967, 'time_algorithm_update': 0.013669783353805542, 'critic_loss': 732568.5126875, 'actor_loss': -2.6778537133790086, 'time_step': 0.01613245701789856, 'td_error': 1370728.3704089487, 'value_scale': 8212.434481701526, 'discounted_advantage': -9126.745817210982, 'initial_state': 7520.50146484375, 'diff_eval': 4266.238901129996} step=26000
2025-12-06 10:22.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.96it/s, critic_loss=8.85e+5, actor_loss=-2.75]


2025-12-06 10:22.22 [info     ] CRR_20251206101312: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.0019386866092681884, 'time_algorithm_update': 0.012933217048645019, 'critic_loss': 885755.266890625, 'actor_loss': -2.7596637953044847, 'time_step': 0.015105727195739747, 'td_error': 1674344.8003829175, 'value_scale': 9119.721690738292, 'discounted_advantage': -9865.932809324531, 'initial_state': 8383.1513671875, 'diff_eval': 4054.0091080528437} step=27000
2025-12-06 10:22.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.57it/s, critic_loss=1.06e+6, actor_loss=-2.89]


2025-12-06 10:22.42 [info     ] CRR_20251206101312: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.001965972900390625, 'time_algorithm_update': 0.012986533403396606, 'critic_loss': 1056522.43053125, 'actor_loss': -2.9020086972657593, 'time_step': 0.015200672864913941, 'td_error': 1962977.0135847498, 'value_scale': 9852.962106632767, 'discounted_advantage': -10653.319525966834, 'initial_state': 9066.0908203125, 'diff_eval': 3930.593111038661} step=28000
2025-12-06 10:22.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.87it/s, critic_loss=1.17e+6, actor_loss=-2.99]


2025-12-06 10:23.02 [info     ] CRR_20251206101312: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.001995938301086426, 'time_algorithm_update': 0.013136219263076782, 'critic_loss': 1172916.355484375, 'actor_loss': -2.9892018264973013, 'time_step': 0.015364990234375, 'td_error': 2224715.3196661905, 'value_scale': 10582.531915298552, 'discounted_advantage': -11621.235123128658, 'initial_state': 9720.685546875, 'diff_eval': 3804.908649518429} step=29000
2025-12-06 10:23.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.70it/s, critic_loss=1.33e+6, actor_loss=-3.22]


2025-12-06 10:23.22 [info     ] CRR_20251206101312: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.0020390114784240724, 'time_algorithm_update': 0.013119966983795165, 'critic_loss': 1326647.67134375, 'actor_loss': -3.2276832912275566, 'time_step': 0.015407759189605713, 'td_error': 2615563.6206001006, 'value_scale': 11483.245485342559, 'discounted_advantage': -11800.532589314076, 'initial_state': 10604.52734375, 'diff_eval': 3843.5212913036044} step=30000
2025-12-06 10:23.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.52it/s, critic_loss=1.57e+6, actor_loss=-3.22]


2025-12-06 10:23.43 [info     ] CRR_20251206101312: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.002101522445678711, 'time_algorithm_update': 0.013604312896728516, 'critic_loss': 1571250.17871875, 'actor_loss': -3.2212614339217542, 'time_step': 0.01595640563964844, 'td_error': 3012132.950138306, 'value_scale': 12308.508708841942, 'discounted_advantage': -14007.506255223436, 'initial_state': 11463.03515625, 'diff_eval': 3850.9864409855313} step=31000
2025-12-06 10:23.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.61it/s, critic_loss=1.79e+6, actor_loss=-3.35]


2025-12-06 10:24.03 [info     ] CRR_20251206101312: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.001995757818222046, 'time_algorithm_update': 0.01316049599647522, 'critic_loss': 1790752.03890625, 'actor_loss': -3.349714296042919, 'time_step': 0.015413105726242065, 'td_error': 3393504.7662734264, 'value_scale': 13096.991027985514, 'discounted_advantage': -13375.65956950666, 'initial_state': 12110.60546875, 'diff_eval': 3558.0751693430034} step=32000
2025-12-06 10:24.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.02it/s, critic_loss=1.98e+6, actor_loss=-3.4]


2025-12-06 10:24.24 [info     ] CRR_20251206101312: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.002101343631744385, 'time_algorithm_update': 0.013228256225585938, 'critic_loss': 1979977.681828125, 'actor_loss': -3.412416359759867, 'time_step': 0.015574025392532349, 'td_error': 3875960.7433820604, 'value_scale': 14001.940500187455, 'discounted_advantage': -14834.30042901656, 'initial_state': 12991.384765625, 'diff_eval': 3882.9640735768967} step=33000
2025-12-06 10:24.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.76it/s, critic_loss=2.17e+6, actor_loss=-3.48]


2025-12-06 10:24.45 [info     ] CRR_20251206101312: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.0019755613803863524, 'time_algorithm_update': 0.013695591688156128, 'critic_loss': 2175097.863, 'actor_loss': -3.4834512119647116, 'time_step': 0.015905019760131835, 'td_error': 4453814.672827847, 'value_scale': 14953.252034573423, 'discounted_advantage': -15677.228493303985, 'initial_state': 13917.0849609375, 'diff_eval': 3737.6830817765376} step=34000
2025-12-06 10:24.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.57it/s, critic_loss=2.46e+6, actor_loss=-3.52]


2025-12-06 10:25.06 [info     ] CRR_20251206101312: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.0019819853305816653, 'time_algorithm_update': 0.0134635272026062, 'critic_loss': 2461943.638625, 'actor_loss': -3.5218819494596683, 'time_step': 0.015692598581314086, 'td_error': 5041976.010908914, 'value_scale': 15837.642230332047, 'discounted_advantage': -17317.179878457777, 'initial_state': 14751.5625, 'diff_eval': 3925.717460540536} step=35000
2025-12-06 10:25.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.88it/s, critic_loss=2.78e+6, actor_loss=-3.62]


2025-12-06 10:25.26 [info     ] CRR_20251206101312: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.002006986379623413, 'time_algorithm_update': 0.013134125232696534, 'critic_loss': 2777166.22121875, 'actor_loss': -3.6182158298529683, 'time_step': 0.015372859001159669, 'td_error': 5834728.220237188, 'value_scale': 17007.74706795909, 'discounted_advantage': -17182.857911806914, 'initial_state': 15951.0439453125, 'diff_eval': 3855.886369366137} step=36000
2025-12-06 10:25.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.57it/s, critic_loss=3.09e+6, actor_loss=-3.7]


2025-12-06 10:25.47 [info     ] CRR_20251206101312: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.0021670899391174315, 'time_algorithm_update': 0.013761024951934814, 'critic_loss': 3087800.6885, 'actor_loss': -3.696197726096958, 'time_step': 0.016192420959472657, 'td_error': 6253990.018544598, 'value_scale': 17660.970560334456, 'discounted_advantage': -17490.922385126112, 'initial_state': 16616.126953125, 'diff_eval': 3812.57995506001} step=37000
2025-12-06 10:25.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.36it/s, critic_loss=3.27e+6, actor_loss=-3.81]


2025-12-06 10:26.08 [info     ] CRR_20251206101312: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.0020314884185791016, 'time_algorithm_update': 0.013230883836746216, 'critic_loss': 3287502.59840625, 'actor_loss': -3.8009082970172168, 'time_step': 0.015503895282745362, 'td_error': 6774397.420942521, 'value_scale': 18332.45992901299, 'discounted_advantage': -19099.57804510474, 'initial_state': 17269.34375, 'diff_eval': 3604.0729358051312} step=38000
2025-12-06 10:26.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.09it/s, critic_loss=3.54e+6, actor_loss=-3.97]


2025-12-06 10:26.28 [info     ] CRR_20251206101312: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.0020295164585113525, 'time_algorithm_update': 0.01327616310119629, 'critic_loss': 3539241.19484375, 'actor_loss': -3.965505792275071, 'time_step': 0.015554339647293091, 'td_error': 7242080.81016145, 'value_scale': 18972.874166922018, 'discounted_advantage': -20363.820649854184, 'initial_state': 17844.697265625, 'diff_eval': 3853.098480739527} step=39000
2025-12-06 10:26.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.59it/s, critic_loss=3.86e+6, actor_loss=-3.91]


2025-12-06 10:26.49 [info     ] CRR_20251206101312: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.0020046555995941163, 'time_algorithm_update': 0.013173379182815552, 'critic_loss': 3866934.698875, 'actor_loss': -3.912386405132711, 'time_step': 0.015432119369506836, 'td_error': 7956027.150594724, 'value_scale': 19988.43155223466, 'discounted_advantage': -21989.294872304985, 'initial_state': 18946.10546875, 'diff_eval': 3858.765056018812} step=40000
2025-12-06 10:26.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.94it/s, critic_loss=4.2e+6, actor_loss=-4.1] 


2025-12-06 10:27.10 [info     ] CRR_20251206101312: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.0021019296646118163, 'time_algorithm_update': 0.014233738899230956, 'critic_loss': 4197760.98059375, 'actor_loss': -4.094573415059596, 'time_step': 0.016624486207962037, 'td_error': 9023925.100304127, 'value_scale': 21231.16042912396, 'discounted_advantage': -22329.8422182757, 'initial_state': 20146.005859375, 'diff_eval': 4131.789129405702} step=41000
2025-12-06 10:27.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.27it/s, critic_loss=4.69e+6, actor_loss=-4.15]


2025-12-06 10:27.31 [info     ] CRR_20251206101312: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.0020062408447265624, 'time_algorithm_update': 0.013413970947265624, 'critic_loss': 4699075.2538125, 'actor_loss': -4.145159204225085, 'time_step': 0.015703391790390014, 'td_error': 9642160.68067119, 'value_scale': 22023.39699430058, 'discounted_advantage': -23398.75145136632, 'initial_state': 20893.126953125, 'diff_eval': 3784.826609975047} step=42000
2025-12-06 10:27.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.57it/s, critic_loss=5.03e+6, actor_loss=-4.2]


2025-12-06 10:27.51 [info     ] CRR_20251206101312: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.0020532219409942626, 'time_algorithm_update': 0.013319586277008057, 'critic_loss': 5046929.3328125, 'actor_loss': -4.212242958322167, 'time_step': 0.01565030860900879, 'td_error': 10730972.412652208, 'value_scale': 23039.263567743037, 'discounted_advantage': -24367.62139883431, 'initial_state': 21811.91015625, 'diff_eval': 4326.554933811851} step=43000
2025-12-06 10:27.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.86it/s, critic_loss=5.39e+6, actor_loss=-4.2]


2025-12-06 10:28.12 [info     ] CRR_20251206101312: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.0020003666877746584, 'time_algorithm_update': 0.013122092723846436, 'critic_loss': 5393380.0789375, 'actor_loss': -4.203552685573697, 'time_step': 0.015366610050201415, 'td_error': 11437652.724959921, 'value_scale': 23914.700884129157, 'discounted_advantage': -24422.908800721816, 'initial_state': 22694.2734375, 'diff_eval': 4137.433135537406} step=44000
2025-12-06 10:28.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.10it/s, critic_loss=5.86e+6, actor_loss=-4.27]


2025-12-06 10:28.32 [info     ] CRR_20251206101312: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.0020281035900115967, 'time_algorithm_update': 0.013295343637466431, 'critic_loss': 5878800.0464375, 'actor_loss': -4.268003123399569, 'time_step': 0.015553326606750488, 'td_error': 12912259.64961187, 'value_scale': 25460.68135734136, 'discounted_advantage': -26798.282263647925, 'initial_state': 24302.884765625, 'diff_eval': 3937.5330363951775} step=45000
2025-12-06 10:28.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:15<00:00, 65.24it/s, critic_loss=6.55e+6, actor_loss=-4.12]


2025-12-06 10:28.52 [info     ] CRR_20251206101312: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.001927065372467041, 'time_algorithm_update': 0.012829866886138916, 'critic_loss': 6541546.5470625, 'actor_loss': -4.113543168450706, 'time_step': 0.015016685724258422, 'td_error': 14248836.507378057, 'value_scale': 26629.432468507035, 'discounted_advantage': -27057.578626885985, 'initial_state': 25355.427734375, 'diff_eval': 4338.193485394403} step=46000
2025-12-06 10:28.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.86it/s, critic_loss=7.24e+6, actor_loss=-4.07]


2025-12-06 10:29.12 [info     ] CRR_20251206101312: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.0020567147731781004, 'time_algorithm_update': 0.013235152959823608, 'critic_loss': 7229599.4520625, 'actor_loss': -4.06900055591762, 'time_step': 0.015579487323760986, 'td_error': 15551205.357874678, 'value_scale': 27954.526406645477, 'discounted_advantage': -28405.35244049273, 'initial_state': 26832.2265625, 'diff_eval': 4247.929924566338} step=47000
2025-12-06 10:29.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.71it/s, critic_loss=7.82e+6, actor_loss=-4.01]


2025-12-06 10:29.33 [info     ] CRR_20251206101312: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.0020154993534088135, 'time_algorithm_update': 0.01336371612548828, 'critic_loss': 7806114.97475, 'actor_loss': -4.010514855921269, 'time_step': 0.015635939598083497, 'td_error': 17269807.361469235, 'value_scale': 29292.71855654081, 'discounted_advantage': -28406.887522476343, 'initial_state': 28100.86328125, 'diff_eval': 3933.7693646561606} step=48000
2025-12-06 10:29.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.54it/s, critic_loss=8.61e+6, actor_loss=-3.77]


2025-12-06 10:29.54 [info     ] CRR_20251206101312: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.002063145637512207, 'time_algorithm_update': 0.01340653157234192, 'critic_loss': 8605024.479375, 'actor_loss': -3.778418261713348, 'time_step': 0.01570255970954895, 'td_error': 18491383.744565792, 'value_scale': 30520.17576777865, 'discounted_advantage': -31210.750936502533, 'initial_state': 29514.560546875, 'diff_eval': 4435.458494493073} step=49000
2025-12-06 10:29.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.04it/s, critic_loss=9.61e+6, actor_loss=-3.78]


2025-12-06 10:30.14 [info     ] CRR_20251206101312: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.002040353059768677, 'time_algorithm_update': 0.013510427713394165, 'critic_loss': 9598695.393875, 'actor_loss': -3.790070056030527, 'time_step': 0.015810222625732423, 'td_error': 19914424.37704589, 'value_scale': 31400.85816638048, 'discounted_advantage': -31927.412331186086, 'initial_state': 30151.21484375, 'diff_eval': 4366.409045975941} step=50000
2025-12-06 10:30.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.93it/s, critic_loss=1.02e+7, actor_loss=-3.28]


2025-12-06 10:30.35 [info     ] CRR_20251206101312: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.0019473483562469482, 'time_algorithm_update': 0.013130404233932495, 'critic_loss': 10200394.424375, 'actor_loss': -3.2806232781400904, 'time_step': 0.015338446140289307, 'td_error': 21803642.97280527, 'value_scale': 32846.1696624548, 'discounted_advantage': -31982.103315818396, 'initial_state': 31646.66796875, 'diff_eval': 4845.142904714081} step=51000
2025-12-06 10:30.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.21it/s, critic_loss=1.09e+7, actor_loss=-3.55]


2025-12-06 10:30.55 [info     ] CRR_20251206101312: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.0020040919780731203, 'time_algorithm_update': 0.013276794910430907, 'critic_loss': 10869989.292, 'actor_loss': -3.5543058192431927, 'time_step': 0.015527782440185546, 'td_error': 23609838.05807632, 'value_scale': 34466.48556008862, 'discounted_advantage': -36515.21311889304, 'initial_state': 33508.00390625, 'diff_eval': 5138.52065336325} step=52000
2025-12-06 10:30.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.66it/s, critic_loss=1.23e+7, actor_loss=-3.14]


2025-12-06 10:31.15 [info     ] CRR_20251206101312: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.0019944045543670652, 'time_algorithm_update': 0.013156749486923218, 'critic_loss': 12281533.71575, 'actor_loss': -3.1439606794044375, 'time_step': 0.015407114267349243, 'td_error': 25145790.675357014, 'value_scale': 35477.820128705454, 'discounted_advantage': -36655.30751378515, 'initial_state': 34634.74609375, 'diff_eval': 5320.549384208906} step=53000
2025-12-06 10:31.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.13it/s, critic_loss=1.51e+7, actor_loss=-1.97]


2025-12-06 10:31.36 [info     ] CRR_20251206101312: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.002054759979248047, 'time_algorithm_update': 0.013502090930938722, 'critic_loss': 15043435.59675, 'actor_loss': -1.9648647762497276, 'time_step': 0.015806113719940184, 'td_error': 26860242.75250567, 'value_scale': 36858.70513447572, 'discounted_advantage': -39352.03718638867, 'initial_state': 36060.23828125, 'diff_eval': 6374.806797834954} step=54000
2025-12-06 10:31.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.05it/s, critic_loss=1.98e+7, actor_loss=-1.22]


2025-12-06 10:31.56 [info     ] CRR_20251206101312: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.0019736993312835694, 'time_algorithm_update': 0.01313096284866333, 'critic_loss': 19792006.1275, 'actor_loss': -1.2265368131825234, 'time_step': 0.015338353633880615, 'td_error': 28238591.854119927, 'value_scale': 38426.18239491024, 'discounted_advantage': -41275.56366074563, 'initial_state': 37596.1328125, 'diff_eval': 5567.432161658331} step=55000
2025-12-06 10:31.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.46it/s, critic_loss=2.25e+7, actor_loss=-1.46]


2025-12-06 10:32.17 [info     ] CRR_20251206101312: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.002019990921020508, 'time_algorithm_update': 0.013468775510787965, 'critic_loss': 22597062.976, 'actor_loss': -1.4604963856739923, 'time_step': 0.015730401754379272, 'td_error': 29786076.653606277, 'value_scale': 39689.4242841368, 'discounted_advantage': -42803.08198603373, 'initial_state': 37862.21875, 'diff_eval': 5423.305450406949} step=56000
2025-12-06 10:32.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.52it/s, critic_loss=2.91e+7, actor_loss=-1.38]


2025-12-06 10:32.37 [info     ] CRR_20251206101312: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.0020002994537353515, 'time_algorithm_update': 0.013204458475112915, 'critic_loss': 29065055.569, 'actor_loss': -1.389778650265187, 'time_step': 0.015443436622619629, 'td_error': 34043173.61047426, 'value_scale': 42405.71560466859, 'discounted_advantage': -48292.72056565604, 'initial_state': 39961.1484375, 'diff_eval': 5180.926136495282} step=57000
2025-12-06 10:32.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.67it/s, critic_loss=3.61e+7, actor_loss=-1.37]


2025-12-06 10:32.58 [info     ] CRR_20251206101312: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.002133978605270386, 'time_algorithm_update': 0.013527719974517823, 'critic_loss': 36134619.347, 'actor_loss': -1.3683073454853147, 'time_step': 0.015917508602142333, 'td_error': 41944167.329377905, 'value_scale': 45945.73072310685, 'discounted_advantage': -50892.55247352225, 'initial_state': 43004.38671875, 'diff_eval': 5320.1661757975235} step=58000
2025-12-06 10:32.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.91it/s, critic_loss=5.74e+7, actor_loss=-1.8]


2025-12-06 10:33.18 [info     ] CRR_20251206101312: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.001954737424850464, 'time_algorithm_update': 0.013396316528320313, 'critic_loss': 57338390.166, 'actor_loss': -1.8057333399818745, 'time_step': 0.015602603435516357, 'td_error': 53460836.987920664, 'value_scale': 50308.28122835247, 'discounted_advantage': -54736.595037292325, 'initial_state': 44702.5859375, 'diff_eval': 4649.952660097101} step=59000
2025-12-06 10:33.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:15<00:00, 65.20it/s, critic_loss=8.53e+7, actor_loss=-2.25]


2025-12-06 10:33.38 [info     ] CRR_20251206101312: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.0019048023223876952, 'time_algorithm_update': 0.012903570652008057, 'critic_loss': 85507303.448, 'actor_loss': -2.2494123116880655, 'time_step': 0.015048645496368408, 'td_error': 70036029.21847866, 'value_scale': 55750.62517034718, 'discounted_advantage': -63516.68803842153, 'initial_state': 48509.8203125, 'diff_eval': 4793.899910197221} step=60000
2025-12-06 10:33.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.82it/s, critic_loss=1.52e+8, actor_loss=-2.02]


2025-12-06 10:33.58 [info     ] CRR_20251206101312: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.0019585902690887453, 'time_algorithm_update': 0.01320758867263794, 'critic_loss': 152029208.174, 'actor_loss': -2.019659893065691, 'time_step': 0.015408101797103882, 'td_error': 95626134.12893057, 'value_scale': 62674.927791892835, 'discounted_advantage': -72052.4708515685, 'initial_state': 54676.35546875, 'diff_eval': 5818.984650729588} step=61000
2025-12-06 10:33.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.77it/s, critic_loss=2.79e+8, actor_loss=-1.98]


2025-12-06 10:34.20 [info     ] CRR_20251206101312: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.0019944720268249513, 'time_algorithm_update': 0.01447367787361145, 'critic_loss': 279315903.4, 'actor_loss': -1.9841891253152861, 'time_step': 0.016718759298324583, 'td_error': 126000271.82728688, 'value_scale': 69985.36963269136, 'discounted_advantage': -84479.90070553927, 'initial_state': 60435.578125, 'diff_eval': 5327.605128047737} step=62000
2025-12-06 10:34.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.65it/s, critic_loss=3.23e+8, actor_loss=-2.41]


2025-12-06 10:34.41 [info     ] CRR_20251206101312: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.002000924348831177, 'time_algorithm_update': 0.013181892395019532, 'critic_loss': 322284113.68, 'actor_loss': -2.4128421732361893, 'time_step': 0.015424485921859741, 'td_error': 142902211.0191173, 'value_scale': 78020.14189079202, 'discounted_advantage': -88184.93212514627, 'initial_state': 68006.8125, 'diff_eval': 3695.1998513919925} step=63000
2025-12-06 10:34.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.10it/s, critic_loss=2.9e+8, actor_loss=-2.54]


2025-12-06 10:35.01 [info     ] CRR_20251206101312: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.002048105239868164, 'time_algorithm_update': 0.013260683298110962, 'critic_loss': 289973602.192, 'actor_loss': -2.5400188170270996, 'time_step': 0.015549500942230225, 'td_error': 165713128.24330628, 'value_scale': 85186.64449431171, 'discounted_advantage': -92349.19278411733, 'initial_state': 75275.0625, 'diff_eval': 3459.2863326888323} step=64000
2025-12-06 10:35.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.75it/s, critic_loss=2.88e+8, actor_loss=-2.52]


2025-12-06 10:35.21 [info     ] CRR_20251206101312: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.0020226728916168213, 'time_algorithm_update': 0.013138876676559448, 'critic_loss': 288639294.472, 'actor_loss': -2.5161182510994404, 'time_step': 0.015396192073822021, 'td_error': 195355586.20978048, 'value_scale': 92906.21383904891, 'discounted_advantage': -103234.8953428587, 'initial_state': 82495.625, 'diff_eval': 4072.7836570119025} step=65000
2025-12-06 10:35.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.47it/s, critic_loss=3.68e+8, actor_loss=-2.13]


2025-12-06 10:35.42 [info     ] CRR_20251206101312: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.0020189635753631594, 'time_algorithm_update': 0.013216808557510376, 'critic_loss': 369472117.888, 'actor_loss': -2.124534218996763, 'time_step': 0.015468856573104858, 'td_error': 237610707.82392406, 'value_scale': 101419.8543609082, 'discounted_advantage': -116353.0014269332, 'initial_state': 90833.859375, 'diff_eval': 4696.441685998691} step=66000
2025-12-06 10:35.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.86it/s, critic_loss=5.69e+8, actor_loss=-1.83]


2025-12-06 10:36.02 [info     ] CRR_20251206101312: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.0020578999519348146, 'time_algorithm_update': 0.01330335545539856, 'critic_loss': 568821150.544, 'actor_loss': -1.8264767801016568, 'time_step': 0.01560825228691101, 'td_error': 298006421.9320403, 'value_scale': 112687.79839376752, 'discounted_advantage': -127055.14116238209, 'initial_state': 102862.0625, 'diff_eval': 4973.351816030337} step=67000
2025-12-06 10:36.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.69it/s, critic_loss=8.45e+8, actor_loss=-1.84]


2025-12-06 10:36.22 [info     ] CRR_20251206101312: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.0019663379192352295, 'time_algorithm_update': 0.013206039428710938, 'critic_loss': 844602978.144, 'actor_loss': -1.8459819159135222, 'time_step': 0.01541468858718872, 'td_error': 351204818.1718326, 'value_scale': 120873.2526671289, 'discounted_advantage': -139986.42598279277, 'initial_state': 108597.2578125, 'diff_eval': 4414.713189377075} step=68000
2025-12-06 10:36.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.26it/s, critic_loss=9.51e+8, actor_loss=-1.95]


2025-12-06 10:36.43 [info     ] CRR_20251206101312: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.0019010255336761475, 'time_algorithm_update': 0.013133048057556152, 'critic_loss': 951168438.896, 'actor_loss': -1.9492736974661238, 'time_step': 0.01527323317527771, 'td_error': 418344421.0005343, 'value_scale': 131530.31102164972, 'discounted_advantage': -148562.54478645435, 'initial_state': 118686.96875, 'diff_eval': 4497.884067618042} step=69000
2025-12-06 10:36.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.42it/s, critic_loss=1.3e+9, actor_loss=-1.77]


2025-12-06 10:37.03 [info     ] CRR_20251206101312: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.001981947660446167, 'time_algorithm_update': 0.012990731000900268, 'critic_loss': 1302682195.296, 'actor_loss': -1.7757235465216217, 'time_step': 0.015225931406021118, 'td_error': 536256378.744229, 'value_scale': 146491.61580492326, 'discounted_advantage': -166951.84070968043, 'initial_state': 134693.125, 'diff_eval': 5456.488486091558} step=70000
2025-12-06 10:37.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.98it/s, critic_loss=1.95e+9, actor_loss=-1.58]


2025-12-06 10:37.23 [info     ] CRR_20251206101312: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.001892810583114624, 'time_algorithm_update': 0.012938574314117431, 'critic_loss': 1955765320.992, 'actor_loss': -1.5818359333183616, 'time_step': 0.015080261945724488, 'td_error': 671913605.2355398, 'value_scale': 161379.6167110881, 'discounted_advantage': -185359.60013703012, 'initial_state': 150155.453125, 'diff_eval': 5841.923156798987} step=71000
2025-12-06 10:37.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.06it/s, critic_loss=2.82e+9, actor_loss=-1.53]


2025-12-06 10:37.43 [info     ] CRR_20251206101312: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.002032167911529541, 'time_algorithm_update': 0.01325761866569519, 'critic_loss': 2818779894.144, 'actor_loss': -1.5244111890858039, 'time_step': 0.01555115008354187, 'td_error': 868890309.4743797, 'value_scale': 178534.530491179, 'discounted_advantage': -218772.14619784465, 'initial_state': 169316.546875, 'diff_eval': 5345.268714729945} step=72000
2025-12-06 10:37.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.05it/s, critic_loss=3.01e+9, actor_loss=-1.72]  


2025-12-06 10:38.03 [info     ] CRR_20251206101312: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.0020026798248291016, 'time_algorithm_update': 0.01306895089149475, 'critic_loss': 3013038724.096, 'actor_loss': -1.7278822378963232, 'time_step': 0.015323695421218872, 'td_error': 924407981.5508827, 'value_scale': 185728.1232940853, 'discounted_advantage': -218748.05353734153, 'initial_state': 166711.09375, 'diff_eval': 4283.286960858664} step=73000
2025-12-06 10:38.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.69it/s, critic_loss=3.48e+9, actor_loss=-1.48]


2025-12-06 10:38.23 [info     ] CRR_20251206101312: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.0019410769939422607, 'time_algorithm_update': 0.013003800630569457, 'critic_loss': 3476991206.656, 'actor_loss': -1.4831666017179377, 'time_step': 0.015184624195098877, 'td_error': 1205301893.680603, 'value_scale': 213328.5677192477, 'discounted_advantage': -248580.102373345, 'initial_state': 194171.9375, 'diff_eval': 7407.540508165141} step=74000
2025-12-06 10:38.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.45it/s, critic_loss=4.25e+9, actor_loss=-0.78]


2025-12-06 10:38.44 [info     ] CRR_20251206101312: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.002031919240951538, 'time_algorithm_update': 0.013199997663497925, 'critic_loss': 4257273729.792, 'actor_loss': -0.7773472618531377, 'time_step': 0.015466967344284058, 'td_error': 1967879289.9289303, 'value_scale': 264727.67606938916, 'discounted_advantage': -343811.0330917996, 'initial_state': 258340.125, 'diff_eval': 12837.663729208794} step=75000
2025-12-06 10:38.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.33it/s, critic_loss=7.81e+9, actor_loss=-0.27]


2025-12-06 10:39.05 [info     ] CRR_20251206101312: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.0021098976135253905, 'time_algorithm_update': 0.013637754678726197, 'critic_loss': 7838624446.464, 'actor_loss': -0.26772311520485725, 'time_step': 0.01599966859817505, 'td_error': 4036419158.3779306, 'value_scale': 376645.9635307523, 'discounted_advantage': -496019.093565377, 'initial_state': 406663.0, 'diff_eval': 25661.236007598058} step=76000
2025-12-06 10:39.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.29it/s, critic_loss=1.67e+10, actor_loss=-0.0557]


2025-12-06 10:39.25 [info     ] CRR_20251206101312: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.0020356595516204832, 'time_algorithm_update': 0.013496555089950561, 'critic_loss': 16701130098.688, 'actor_loss': -0.055429516544172136, 'time_step': 0.01575851559638977, 'td_error': 9368511222.552402, 'value_scale': 539652.8808675607, 'discounted_advantage': -804027.8463844738, 'initial_state': 602374.375, 'diff_eval': 48915.45320117244} step=77000
2025-12-06 10:39.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.71it/s, critic_loss=2.97e+10, actor_loss=-0.0561]


2025-12-06 10:39.46 [info     ] CRR_20251206101312: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.0020819084644317625, 'time_algorithm_update': 0.013831702947616577, 'critic_loss': 29792865230.848, 'actor_loss': -0.055254441694846605, 'time_step': 0.01616389799118042, 'td_error': 17768232439.21518, 'value_scale': 755204.3675738684, 'discounted_advantage': -1074239.0803727014, 'initial_state': 872383.6875, 'diff_eval': 61138.593285457486} step=78000
2025-12-06 10:39.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.84it/s, critic_loss=5.28e+10, actor_loss=0.0953]


2025-12-06 10:40.07 [info     ] CRR_20251206101312: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.0020573849678039552, 'time_algorithm_update': 0.01358070158958435, 'critic_loss': 52827823072.256, 'actor_loss': 0.09545671917306027, 'time_step': 0.015884994506835937, 'td_error': 30511277802.86088, 'value_scale': 996721.3153290026, 'discounted_advantage': -1336433.2560447734, 'initial_state': 1191004.0, 'diff_eval': 63176.529247965016} step=79000
2025-12-06 10:40.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.57it/s, critic_loss=8.05e+10, actor_loss=0.0807]


2025-12-06 10:40.27 [info     ] CRR_20251206101312: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.00199692964553833, 'time_algorithm_update': 0.013211854457855225, 'critic_loss': 80615007893.504, 'actor_loss': 0.07999364314539051, 'time_step': 0.015449410438537597, 'td_error': 46211996002.27038, 'value_scale': 1213209.6373899833, 'discounted_advantage': -1627545.6911137884, 'initial_state': 1466847.875, 'diff_eval': 64624.01691915123} step=80000
2025-12-06 10:40.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.62it/s, critic_loss=1.16e+11, actor_loss=0.0139] 


2025-12-06 10:40.47 [info     ] CRR_20251206101312: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.0019854104518890383, 'time_algorithm_update': 0.012986845970153808, 'critic_loss': 116152716824.576, 'actor_loss': 0.012965507497428917, 'time_step': 0.0151960608959198, 'td_error': 70949535830.82115, 'value_scale': 1476621.4654756915, 'discounted_advantage': -2055507.969665997, 'initial_state': 1800850.25, 'diff_eval': 68528.02485594066} step=81000
2025-12-06 10:40.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.66it/s, critic_loss=1.7e+11, actor_loss=-0.0384]


2025-12-06 10:41.08 [info     ] CRR_20251206101312: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.001984933376312256, 'time_algorithm_update': 0.013210643529891968, 'critic_loss': 170229877874.688, 'actor_loss': -0.03810117209237069, 'time_step': 0.015421619892120361, 'td_error': 115171833574.43457, 'value_scale': 1849369.9131391451, 'discounted_advantage': -2494000.1061633774, 'initial_state': 2263080.75, 'diff_eval': 83812.67890712741} step=82000
2025-12-06 10:41.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.78it/s, critic_loss=2.7e+11, actor_loss=-0.0053] 


2025-12-06 10:41.28 [info     ] CRR_20251206101312: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.0019308719635009765, 'time_algorithm_update': 0.012977052211761474, 'critic_loss': 270363233452.032, 'actor_loss': -0.005251213259994983, 'time_step': 0.015145049810409546, 'td_error': 180101891792.69678, 'value_scale': 2276586.829421626, 'discounted_advantage': -3114225.7647852656, 'initial_state': 2810077.5, 'diff_eval': 90603.20844631403} step=83000
2025-12-06 10:41.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.24it/s, critic_loss=3.83e+11, actor_loss=-0.0028]


2025-12-06 10:41.48 [info     ] CRR_20251206101312: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.0019433538913726806, 'time_algorithm_update': 0.013100398063659669, 'critic_loss': 382962033819.648, 'actor_loss': -0.0027789398431777956, 'time_step': 0.015281036376953125, 'td_error': 254299030937.28268, 'value_scale': 2682709.3346081306, 'discounted_advantage': -3625471.1677371264, 'initial_state': 3319521.5, 'diff_eval': 93861.33538957774} step=84000
2025-12-06 10:41.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.49it/s, critic_loss=5.13e+11, actor_loss=-0.00137]


2025-12-06 10:42.08 [info     ] CRR_20251206101312: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.002007957458496094, 'time_algorithm_update': 0.013226675987243652, 'critic_loss': 513376755761.152, 'actor_loss': -0.0013533462584018707, 'time_step': 0.015471718549728394, 'td_error': 353977884355.9549, 'value_scale': 3118160.5379295894, 'discounted_advantage': -4294117.467787183, 'initial_state': 3857853.0, 'diff_eval': 103087.65932764532} step=85000
2025-12-06 10:42.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.64it/s, critic_loss=6.74e+11, actor_loss=-0.00491]


2025-12-06 10:42.29 [info     ] CRR_20251206101312: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.0019840636253356933, 'time_algorithm_update': 0.013444717407226563, 'critic_loss': 674341437751.296, 'actor_loss': -0.004868556052446365, 'time_step': 0.015670983791351317, 'td_error': 474246866168.6389, 'value_scale': 3586184.5587279964, 'discounted_advantage': -4811748.562669214, 'initial_state': 4440932.5, 'diff_eval': 105870.82091712153} step=86000
2025-12-06 10:42.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.91it/s, critic_loss=8.67e+11, actor_loss=-0.00109]


2025-12-06 10:42.50 [info     ] CRR_20251206101312: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.0019976413249969483, 'time_algorithm_update': 0.013627881526947022, 'critic_loss': 868035440082.944, 'actor_loss': -0.0010843844190239907, 'time_step': 0.015862310886383058, 'td_error': 620386860070.8103, 'value_scale': 4083672.6554379715, 'discounted_advantage': -5422205.352564315, 'initial_state': 5065704.5, 'diff_eval': 106449.70710149415} step=87000
2025-12-06 10:42.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.48it/s, critic_loss=1.11e+12, actor_loss=-0.00113]


2025-12-06 10:43.10 [info     ] CRR_20251206101312: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.0019685778617858887, 'time_algorithm_update': 0.013024458169937133, 'critic_loss': 1112761324339.2, 'actor_loss': -0.001122727371752262, 'time_step': 0.015232754468917847, 'td_error': 805567883601.254, 'value_scale': 4621210.261735122, 'discounted_advantage': -6162195.110389615, 'initial_state': 5737129.0, 'diff_eval': 107815.08496216536} step=88000
2025-12-06 10:43.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.92it/s, critic_loss=1.4e+12, actor_loss=-0.00144] 


2025-12-06 10:43.30 [info     ] CRR_20251206101312: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.001960323333740234, 'time_algorithm_update': 0.013404006004333497, 'critic_loss': 1399428764303.36, 'actor_loss': -0.0014262112379074096, 'time_step': 0.015600850582122803, 'td_error': 1035171015767.3503, 'value_scale': 5212850.126781223, 'discounted_advantage': -6839855.087989357, 'initial_state': 6474015.5, 'diff_eval': 110313.22470903676} step=89000
2025-12-06 10:43.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.46it/s, critic_loss=1.76e+12, actor_loss=-0.000994]


2025-12-06 10:43.50 [info     ] CRR_20251206101312: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.002007019519805908, 'time_algorithm_update': 0.013229440450668335, 'critic_loss': 1764303396110.336, 'actor_loss': -0.000984609991312027, 'time_step': 0.01546963381767273, 'td_error': 1314963242359.1597, 'value_scale': 5844671.696458508, 'discounted_advantage': -7738266.1138114, 'initial_state': 7267263.5, 'diff_eval': 111038.00065568027} step=90000
2025-12-06 10:43.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.39it/s, critic_loss=2.19e+12, actor_loss=-0.000193]


2025-12-06 10:44.11 [info     ] CRR_20251206101312: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.00201471471786499, 'time_algorithm_update': 0.013439296245574951, 'critic_loss': 2196544785154.048, 'actor_loss': -0.00019081062078475953, 'time_step': 0.015723204374313356, 'td_error': 1651112089057.9119, 'value_scale': 6543000.786567477, 'discounted_advantage': -8456925.241369063, 'initial_state': 8143735.0, 'diff_eval': 111037.21092992816} step=91000
2025-12-06 10:44.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.30it/s, critic_loss=2.7e+12, actor_loss=-0.00103] 


2025-12-06 10:44.32 [info     ] CRR_20251206101312: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.0020761897563934327, 'time_algorithm_update': 0.013446885585784912, 'critic_loss': 2699738968686.592, 'actor_loss': -0.0010242484137415887, 'time_step': 0.015771647930145263, 'td_error': 2044969477826.9387, 'value_scale': 7237306.0976529755, 'discounted_advantage': -9650000.895803194, 'initial_state': 9020856.0, 'diff_eval': 111036.26314430205} step=92000
2025-12-06 10:44.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.69it/s, critic_loss=3.25e+12, actor_loss=-0.00123]


2025-12-06 10:44.52 [info     ] CRR_20251206101312: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.0023828747272491456, 'time_algorithm_update': 0.013055623769760131, 'critic_loss': 3252072497807.36, 'actor_loss': -0.001215059034526348, 'time_step': 0.01567265748977661, 'td_error': 2504999353602.1265, 'value_scale': 8012846.243503772, 'discounted_advantage': -10253333.79881482, 'initial_state': 9992574.0, 'diff_eval': 111039.32662802709} step=93000
2025-12-06 10:44.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.04it/s, critic_loss=4.01e+12, actor_loss=-0.00437]


2025-12-06 10:45.13 [info     ] CRR_20251206101312: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.0019977288246154786, 'time_algorithm_update': 0.013302287340164184, 'critic_loss': 4010938956644.352, 'actor_loss': -0.004334814833477139, 'time_step': 0.015562732219696045, 'td_error': 3066027053620.8374, 'value_scale': 8819420.58507963, 'discounted_advantage': -11619480.272480432, 'initial_state': 11003712.0, 'diff_eval': 111039.06681635714} step=94000
2025-12-06 10:45.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.40it/s, critic_loss=4.77e+12, actor_loss=-0.0017] 


2025-12-06 10:45.33 [info     ] CRR_20251206101312: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.0019456751346588134, 'time_algorithm_update': 0.013052988767623902, 'critic_loss': 4772873959243.776, 'actor_loss': -0.0016799811646342278, 'time_step': 0.01524726891517639, 'td_error': 3744133043959.2637, 'value_scale': 9739139.837803856, 'discounted_advantage': -12570141.277688082, 'initial_state': 12164989.0, 'diff_eval': 111040.17024930584} step=95000
2025-12-06 10:45.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.20it/s, critic_loss=5.83e+12, actor_loss=-0.00168]


2025-12-06 10:45.53 [info     ] CRR_20251206101312: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.002025838613510132, 'time_algorithm_update': 0.013235554695129395, 'critic_loss': 5836324851286.016, 'actor_loss': -0.0016635407209396361, 'time_step': 0.01551001763343811, 'td_error': 4521448771957.248, 'value_scale': 10655557.973805532, 'discounted_advantage': -14052898.327423297, 'initial_state': 13321132.0, 'diff_eval': 111036.62323228271} step=96000
2025-12-06 10:45.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.13it/s, critic_loss=6.79e+12, actor_loss=-0.00182]


2025-12-06 10:46.14 [info     ] CRR_20251206101312: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.0020146989822387696, 'time_algorithm_update': 0.013291213512420654, 'critic_loss': 6797962831462.4, 'actor_loss': -0.0018005964756011962, 'time_step': 0.015550500392913818, 'td_error': 5400746584281.11, 'value_scale': 11643732.681894384, 'discounted_advantage': -14991468.08401841, 'initial_state': 14566036.0, 'diff_eval': 111040.45554737434} step=97000
2025-12-06 10:46.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.60it/s, critic_loss=8.08e+12, actor_loss=-0.00077]


2025-12-06 10:46.34 [info     ] CRR_20251206101312: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.0020039656162261963, 'time_algorithm_update': 0.01340875744819641, 'critic_loss': 8083823244083.2, 'actor_loss': -0.0007632424831390381, 'time_step': 0.01567457389831543, 'td_error': 6441355310022.3955, 'value_scale': 12681963.112950545, 'discounted_advantage': -16318677.315741597, 'initial_state': 15876643.0, 'diff_eval': 111035.90894843946} step=98000
2025-12-06 10:46.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.98it/s, critic_loss=9.41e+12, actor_loss=-0.00162]


2025-12-06 10:46.55 [info     ] CRR_20251206101312: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.0020036399364471436, 'time_algorithm_update': 0.013344046354293823, 'critic_loss': 9417655570399.232, 'actor_loss': -0.0016007210612297058, 'time_step': 0.015593056440353394, 'td_error': 7615754058566.468, 'value_scale': 13751887.99853311, 'discounted_advantage': -17880273.584441744, 'initial_state': 17218712.0, 'diff_eval': 111039.87095929871} step=99000
2025-12-06 10:46.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.35it/s, critic_loss=1.12e+13, actor_loss=-0.000385]


2025-12-06 10:47.15 [info     ] CRR_20251206101312: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.0020789000988006593, 'time_algorithm_update': 0.013388254165649415, 'critic_loss': 11236505123028.992, 'actor_loss': -0.00038162124156951907, 'time_step': 0.01573182249069214, 'td_error': 9024567171537.69, 'value_scale': 14938656.221290864, 'discounted_advantage': -19420419.422001798, 'initial_state': 18715022.0, 'diff_eval': 111037.38847746294} step=100000
2025-12-06 10:47.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.87it/s, critic_loss=1.29e+13, actor_loss=-0.000578]


2025-12-06 10:47.36 [info     ] CRR_20251206101312: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.0020168893337249756, 'time_algorithm_update': 0.013100791215896607, 'critic_loss': 12899770457653.248, 'actor_loss': -0.0005724318623542786, 'time_step': 0.01535992693901062, 'td_error': 10634931448730.088, 'value_scale': 16204170.68210394, 'discounted_advantage': -20654508.242251366, 'initial_state': 20319608.0, 'diff_eval': 111035.3777291361} step=101000
2025-12-06 10:47.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.56it/s, critic_loss=1.52e+13, actor_loss=-0.00181]


2025-12-06 10:47.56 [info     ] CRR_20251206101312: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.002023146152496338, 'time_algorithm_update': 0.013186277627944946, 'critic_loss': 15185312281526.271, 'actor_loss': -0.0017915316820144654, 'time_step': 0.01544989275932312, 'td_error': 12462095026068.115, 'value_scale': 17485946.702849958, 'discounted_advantage': -22762835.484176323, 'initial_state': 21944582.0, 'diff_eval': 111040.32811026562} step=102000
2025-12-06 10:47.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.49it/s, critic_loss=1.78e+13, actor_loss=-0.00222]


2025-12-06 10:48.16 [info     ] CRR_20251206101312: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.0019600927829742433, 'time_algorithm_update': 0.013031583070755005, 'critic_loss': 17818990473117.695, 'actor_loss': -0.002198976332321763, 'time_step': 0.015230541706085204, 'td_error': 14559809767507.668, 'value_scale': 18877248.980092205, 'discounted_advantage': -24392380.680924263, 'initial_state': 23699036.0, 'diff_eval': 111035.07165865006} step=103000
2025-12-06 10:48.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.86it/s, critic_loss=2.04e+13, actor_loss=-0.000578]


2025-12-06 10:48.36 [info     ] CRR_20251206101312: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.0019772405624389647, 'time_algorithm_update': 0.013175854206085205, 'critic_loss': 20382725444730.88, 'actor_loss': -0.0005724318623542786, 'time_step': 0.015387247562408448, 'td_error': 16948301232017.854, 'value_scale': 20354358.203059513, 'discounted_advantage': -25862938.963579427, 'initial_state': 25569192.0, 'diff_eval': 111038.36797415261} step=104000
2025-12-06 10:48.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.26it/s, critic_loss=2.39e+13, actor_loss=-0.000604]


2025-12-06 10:48.57 [info     ] CRR_20251206101312: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.0020405187606811525, 'time_algorithm_update': 0.013243719100952149, 'critic_loss': 23878041115557.887, 'actor_loss': -0.0005982552710920572, 'time_step': 0.015532198429107667, 'td_error': 19645881384691.688, 'value_scale': 21854620.04379715, 'discounted_advantage': -28211811.806256942, 'initial_state': 27469292.0, 'diff_eval': 111041.98400991461} step=105000
2025-12-06 10:48.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.61it/s, critic_loss=2.74e+13, actor_loss=-0.000604]


2025-12-06 10:49.17 [info     ] CRR_20251206101312: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.0020401370525360107, 'time_algorithm_update': 0.013399738311767578, 'critic_loss': 27396864131203.07, 'actor_loss': -0.0020081657115370035, 'time_step': 0.01568342852592468, 'td_error': 22716720363784.77, 'value_scale': 23478102.25943001, 'discounted_advantage': -30145370.41304905, 'initial_state': 29529114.0, 'diff_eval': 111037.05200543208} step=106000
2025-12-06 10:49.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.93it/s, critic_loss=3.12e+13, actor_loss=-0.00222]


2025-12-06 10:49.37 [info     ] CRR_20251206101312: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.002035996437072754, 'time_algorithm_update': 0.013321048021316529, 'critic_loss': 31269446945865.727, 'actor_loss': -0.002198976332321763, 'time_step': 0.015602741956710815, 'td_error': 26272844858393.22, 'value_scale': 25217550.645012572, 'discounted_advantage': -32190669.529815584, 'initial_state': 31747110.0, 'diff_eval': 111035.28110736294} step=107000
2025-12-06 10:49.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.61it/s, critic_loss=3.62e+13, actor_loss=-0.00866]


2025-12-06 10:49.58 [info     ] CRR_20251206101312: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.0020126757621765135, 'time_algorithm_update': 0.013176331043243408, 'critic_loss': 36159203702734.85, 'actor_loss': -0.008586477935314179, 'time_step': 0.015428828954696655, 'td_error': 30294827191436.203, 'value_scale': 27048436.440067057, 'discounted_advantage': -34400375.94607853, 'initial_state': 34071700.0, 'diff_eval': 111034.55147744984} step=108000
2025-12-06 10:49.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.23it/s, critic_loss=4.09e+13, actor_loss=-0.00424]


2025-12-06 10:50.18 [info     ] CRR_20251206101312: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.0019910712242126464, 'time_algorithm_update': 0.01348653817176819, 'critic_loss': 40964381870129.15, 'actor_loss': -0.004201328475726769, 'time_step': 0.015742920875549318, 'td_error': 34633268940448.508, 'value_scale': 28879668.527661357, 'discounted_advantage': -36896452.0424037, 'initial_state': 36395436.0, 'diff_eval': 111037.12339129132} step=109000
2025-12-06 10:50.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:15<00:00, 65.45it/s, critic_loss=4.66e+13, actor_loss=-0.00482]


2025-12-06 10:50.38 [info     ] CRR_20251206101312: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.0019426109790802002, 'time_algorithm_update': 0.01282329559326172, 'critic_loss': 46599136414269.44, 'actor_loss': -0.004773760338081047, 'time_step': 0.01499504780769348, 'td_error': 39418947186366.164, 'value_scale': 30771777.233445097, 'discounted_advantage': -39454760.279799946, 'initial_state': 38798800.0, 'diff_eval': 111035.89993385896} step=110000
2025-12-06 10:50.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.31it/s, critic_loss=5.22e+13, actor_loss=-0.00386]


2025-12-06 10:50.58 [info     ] CRR_20251206101312: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.0020010833740234375, 'time_algorithm_update': 0.013240155935287476, 'critic_loss': 52177185508687.875, 'actor_loss': -0.003823202052619308, 'time_step': 0.015498388528823853, 'td_error': 44767881318283.65, 'value_scale': 32786259.07879296, 'discounted_advantage': -41475647.20066055, 'initial_state': 41381044.0, 'diff_eval': 111036.6422938741} step=111000
2025-12-06 10:50.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.53it/s, critic_loss=5.87e+13, actor_loss=-0.000578]


2025-12-06 10:51.19 [info     ] CRR_20251206101312: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.0020398383140563963, 'time_algorithm_update': 0.013411515951156616, 'critic_loss': 58674780810772.48, 'actor_loss': -0.0005724318623542786, 'time_step': 0.015694467782974245, 'td_error': 50539895439674.75, 'value_scale': 34769400.8868399, 'discounted_advantage': -44423968.7564393, 'initial_state': 43882168.0, 'diff_eval': 111037.10971880239} step=112000
2025-12-06 10:51.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.32it/s, critic_loss=6.74e+13, actor_loss=-0.00443]


2025-12-06 10:51.39 [info     ] CRR_20251206101312: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.002048817157745361, 'time_algorithm_update': 0.013447293043136598, 'critic_loss': 67529483424366.59, 'actor_loss': -0.004388644278049469, 'time_step': 0.015745292663574217, 'td_error': 57118353699309.9, 'value_scale': 36939386.781223804, 'discounted_advantage': -46930180.45300096, 'initial_state': 46656204.0, 'diff_eval': 111039.77621963898} step=113000
2025-12-06 10:51.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.44it/s, critic_loss=7.64e+13, actor_loss=-0.00077]


2025-12-06 10:52.00 [info     ] CRR_20251206101312: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.001936204433441162, 'time_algorithm_update': 0.01355844783782959, 'critic_loss': 76421652455358.47, 'actor_loss': -0.0009540531039237976, 'time_step': 0.015733179330825804, 'td_error': 64382244160393.3, 'value_scale': 39146005.82020117, 'discounted_advantage': -50258265.43468576, 'initial_state': 49445540.0, 'diff_eval': 111038.45445702961} step=114000
2025-12-06 10:52.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.92it/s, critic_loss=8.51e+13, actor_loss=-0.00809]


2025-12-06 10:52.20 [info     ] CRR_20251206101312: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.0020006778240203855, 'time_algorithm_update': 0.013328810453414917, 'critic_loss': 85238455073767.42, 'actor_loss': -0.0080140460729599, 'time_step': 0.01559308671951294, 'td_error': 72800638715291.88, 'value_scale': 41605253.3906119, 'discounted_advantage': -53024509.193656884, 'initial_state': 52596420.0, 'diff_eval': 111037.17765374515} step=115000
2025-12-06 10:52.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.01it/s, critic_loss=9.76e+13, actor_loss=-0.00404]


2025-12-06 10:52.41 [info     ] CRR_20251206101312: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.00205105447769165, 'time_algorithm_update': 0.013281282424926757, 'critic_loss': 97727840205668.36, 'actor_loss': -0.00400702303647995, 'time_step': 0.015578964948654175, 'td_error': 81952939218855.16, 'value_scale': 44024512.28080469, 'discounted_advantage': -57387005.00673599, 'initial_state': 55689852.0, 'diff_eval': 111036.27227205083} step=116000
2025-12-06 10:52.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.60it/s, critic_loss=1.07e+14, actor_loss=-0.000196]


2025-12-06 10:53.02 [info     ] CRR_20251206101312: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.001992595911026001, 'time_algorithm_update': 0.013450487613677979, 'critic_loss': 107155064874860.55, 'actor_loss': -0.00019430543924681842, 'time_step': 0.01569254994392395, 'td_error': 92604254362009.75, 'value_scale': 46857539.250209555, 'discounted_advantage': -59155952.554704, 'initial_state': 59338384.0, 'diff_eval': 111039.87596121417} step=117000
2025-12-06 10:53.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.02it/s, critic_loss=1.2e+14, actor_loss=-0.000193]


2025-12-06 10:53.22 [info     ] CRR_20251206101312: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.00193243408203125, 'time_algorithm_update': 0.012981230020523071, 'critic_loss': 120340296618737.66, 'actor_loss': -0.00019081062078475953, 'time_step': 0.01514077615737915, 'td_error': 104061039804069.28, 'value_scale': 49629024.79966471, 'discounted_advantage': -62707065.37869057, 'initial_state': 62889296.0, 'diff_eval': 111042.15328572762} step=118000
2025-12-06 10:53.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.20it/s, critic_loss=1.34e+14, actor_loss=-0.000385]


2025-12-06 10:53.43 [info     ] CRR_20251206101312: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.002031073331832886, 'time_algorithm_update': 0.014013986587524414, 'critic_loss': 133787545614942.2, 'actor_loss': -0.00038162124156951907, 'time_step': 0.016316027164459228, 'td_error': 116815718449351.6, 'value_scale': 52550266.16806371, 'discounted_advantage': -66132053.81914685, 'initial_state': 66623744.0, 'diff_eval': 111040.50716254977} step=119000
2025-12-06 10:53.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.52it/s, critic_loss=1.52e+14, actor_loss=-0.00077]


2025-12-06 10:54.04 [info     ] CRR_20251206101312: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.0020071239471435545, 'time_algorithm_update': 0.01343731951713562, 'critic_loss': 152430225139957.75, 'actor_loss': -0.0007632424831390381, 'time_step': 0.015688711881637573, 'td_error': 130662112125435.1, 'value_scale': 55489949.751886, 'discounted_advantage': -71087151.40415706, 'initial_state': 70414104.0, 'diff_eval': 111038.44270287616} step=120000
2025-12-06 10:54.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.82it/s, critic_loss=1.69e+14, actor_loss=-0.00443]


2025-12-06 10:54.25 [info     ] CRR_20251206101312: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.002001662015914917, 'time_algorithm_update': 0.013339287281036376, 'critic_loss': 169240715832852.47, 'actor_loss': -0.004388644278049469, 'time_step': 0.015608129262924195, 'td_error': 145814924334384.75, 'value_scale': 58541102.11651299, 'discounted_advantage': -75478528.93913545, 'initial_state': 74333040.0, 'diff_eval': 111038.01298672595} step=121000
2025-12-06 10:54.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.53it/s, critic_loss=1.89e+14, actor_loss=-0.00443]


2025-12-06 10:54.45 [info     ] CRR_20251206101312: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.00200841760635376, 'time_algorithm_update': 0.01318567657470703, 'critic_loss': 189140460151767.03, 'actor_loss': -0.004388708287879126, 'time_step': 0.01544489598274231, 'td_error': 162519100168885.53, 'value_scale': 61849429.98575021, 'discounted_advantage': -77873335.67315163, 'initial_state': 78600776.0, 'diff_eval': 111038.22953106991} step=122000
2025-12-06 10:54.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.82it/s, critic_loss=2.1e+14, actor_loss=-0.0077] 


2025-12-06 10:55.06 [info     ] CRR_20251206101312: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.0019843788146972656, 'time_algorithm_update': 0.01340748906135559, 'critic_loss': 210182071803445.25, 'actor_loss': -0.007632424831390381, 'time_step': 0.015630044221878053, 'td_error': 180180994004779.84, 'value_scale': 65023100.790444255, 'discounted_advantage': -82966957.55099432, 'initial_state': 82641304.0, 'diff_eval': 111038.63312233143} step=123000
2025-12-06 10:55.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.82it/s, critic_loss=2.32e+14, actor_loss=-0.000578]


2025-12-06 10:55.26 [info     ] CRR_20251206101312: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.0019958717823028564, 'time_algorithm_update': 0.012887187242507935, 'critic_loss': 232127884028805.12, 'actor_loss': -0.0005724318623542786, 'time_step': 0.01512287425994873, 'td_error': 200021099155668.75, 'value_scale': 68478748.3470243, 'discounted_advantage': -87314464.17567924, 'initial_state': 87091016.0, 'diff_eval': 111038.03599385904} step=124000
2025-12-06 10:55.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.33it/s, critic_loss=2.6e+14, actor_loss=-0.0077] 


2025-12-06 10:55.46 [info     ] CRR_20251206101312: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.0020183417797088625, 'time_algorithm_update': 0.01324184250831604, 'critic_loss': 260603462977323.0, 'actor_loss': -0.007632424831390381, 'time_step': 0.015496659755706787, 'td_error': 221341023590843.25, 'value_scale': 71960823.52137469, 'discounted_advantage': -92144030.21646307, 'initial_state': 91547576.0, 'diff_eval': 111035.61443135086} step=125000
2025-12-06 10:55.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.07it/s, critic_loss=2.81e+14, actor_loss=-0.000578]


2025-12-06 10:56.07 [info     ] CRR_20251206101312: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.00203841233253479, 'time_algorithm_update': 0.013266228914260865, 'critic_loss': 280807689359982.6, 'actor_loss': -0.0005724318623542786, 'time_step': 0.015547546863555908, 'td_error': 244923476430830.72, 'value_scale': 75714984.40989102, 'discounted_advantage': -95761947.12492268, 'initial_state': 96382944.0, 'diff_eval': 111038.2870950499} step=126000
2025-12-06 10:56.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.24it/s, critic_loss=3.09e+14, actor_loss=-0.00404]


2025-12-06 10:56.27 [info     ] CRR_20251206101312: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.0019626989364624025, 'time_algorithm_update': 0.013083096742630005, 'critic_loss': 309193892027170.8, 'actor_loss': -0.0040070870463096075, 'time_step': 0.015288628578186036, 'td_error': 270134586317078.53, 'value_scale': 79517268.76948868, 'discounted_advantage': -99456691.23746632, 'initial_state': 101280912.0, 'diff_eval': 111039.51708493229} step=127000
2025-12-06 10:56.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.98it/s, critic_loss=3.46e+14, actor_loss=-0.000578]


2025-12-06 10:56.47 [info     ] CRR_20251206101312: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.0019958481788635253, 'time_algorithm_update': 0.013067049026489258, 'critic_loss': 345988071998619.6, 'actor_loss': -0.0005724958721839358, 'time_step': 0.01531512427330017, 'td_error': 297917233459594.44, 'value_scale': 83414633.89186923, 'discounted_advantage': -105274963.09248354, 'initial_state': 106325760.0, 'diff_eval': 111037.93273731369} step=128000
2025-12-06 10:56.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.72it/s, critic_loss=3.81e+14, actor_loss=-0.000578]


2025-12-06 10:57.08 [info     ] CRR_20251206101312: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.0022101714611053465, 'time_algorithm_update': 0.013627839088439942, 'critic_loss': 382111209417080.8, 'actor_loss': -0.0005724318623542786, 'time_step': 0.016129149198532103, 'td_error': 327032829752806.75, 'value_scale': 87278739.8466052, 'discounted_advantage': -111587682.24569263, 'initial_state': 111276072.0, 'diff_eval': 111041.11469206765} step=129000
2025-12-06 10:57.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.81it/s, critic_loss=4.14e+14, actor_loss=-0.00809]


2025-12-06 10:57.29 [info     ] CRR_20251206101312: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.002028731107711792, 'time_algorithm_update': 0.013338295936584473, 'critic_loss': 414678503884062.75, 'actor_loss': -0.0080140460729599, 'time_step': 0.0156277596950531, 'td_error': 359392826576194.44, 'value_scale': 91512344.19530596, 'discounted_advantage': -115780224.62174161, 'initial_state': 116717760.0, 'diff_eval': 111040.50448723511} step=130000
2025-12-06 10:57.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.17it/s, critic_loss=4.6e+14, actor_loss=-0.000578]


2025-12-06 10:57.49 [info     ] CRR_20251206101312: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.0020527892112731936, 'time_algorithm_update': 0.0134443199634552, 'critic_loss': 459449895812595.7, 'actor_loss': -0.0005724958721839358, 'time_step': 0.01577185297012329, 'td_error': 394543208137309.4, 'value_scale': 95801492.04526404, 'discounted_advantage': -121924584.67407855, 'initial_state': 122270432.0, 'diff_eval': 111038.59600906819} step=131000
2025-12-06 10:57.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.02it/s, critic_loss=4.94e+14, actor_loss=-0.000963]


2025-12-06 10:58.10 [info     ] CRR_20251206101312: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.002085219383239746, 'time_algorithm_update': 0.013459304094314575, 'critic_loss': 494154901115371.5, 'actor_loss': -0.0009540531039237976, 'time_step': 0.015804467916488648, 'td_error': 432412922642732.5, 'value_scale': 100291744.30259849, 'discounted_advantage': -126739604.29876663, 'initial_state': 128079552.0, 'diff_eval': 111036.35460740223} step=132000
2025-12-06 10:58.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.60it/s, critic_loss=5.52e+14, actor_loss=-0.00809]


2025-12-06 10:58.31 [info     ] CRR_20251206101312: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.0021179964542388918, 'time_algorithm_update': 0.013818392753601074, 'critic_loss': 552040623696248.8, 'actor_loss': -0.0080140460729599, 'time_step': 0.016185704708099366, 'td_error': 472525936497044.4, 'value_scale': 104667794.89689857, 'discounted_advantage': -134469058.0708536, 'initial_state': 133689896.0, 'diff_eval': 111038.26439856684} step=133000
2025-12-06 10:58.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.57it/s, critic_loss=6.06e+14, actor_loss=-0.000385]


2025-12-06 10:58.51 [info     ] CRR_20251206101312: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.001932234525680542, 'time_algorithm_update': 0.013046977281570435, 'critic_loss': 606244236118982.6, 'actor_loss': -0.00038162124156951907, 'time_step': 0.015209564924240112, 'td_error': 516534906222726.1, 'value_scale': 109430448.8398994, 'discounted_advantage': -139682647.01102138, 'initial_state': 139847104.0, 'diff_eval': 111037.04386018048} step=134000
2025-12-06 10:58.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.06it/s, critic_loss=6.64e+14, actor_loss=-0.000385]


2025-12-06 10:59.12 [info     ] CRR_20251206101312: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.002031852722167969, 'time_algorithm_update': 0.013200750350952148, 'critic_loss': 664648711476871.1, 'actor_loss': -0.00038162124156951907, 'time_step': 0.01551827049255371, 'td_error': 563702231420764.8, 'value_scale': 114261606.66890194, 'discounted_advantage': -145853238.8580679, 'initial_state': 146119584.0, 'diff_eval': 111037.10415800339} step=135000
2025-12-06 10:59.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.19it/s, critic_loss=7.19e+14, actor_loss=-0.00424]


2025-12-06 10:59.32 [info     ] CRR_20251206101312: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.002035543441772461, 'time_algorithm_update': 0.013222383975982665, 'critic_loss': 719753040231399.4, 'actor_loss': -0.0041978336787376145, 'time_step': 0.0155073664188385, 'td_error': 614481021748026.8, 'value_scale': 119210983.81642917, 'discounted_advantage': -152884820.62506205, 'initial_state': 152528224.0, 'diff_eval': 111036.83368661757} step=136000
2025-12-06 10:59.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.44it/s, critic_loss=7.65e+14, actor_loss=-0.00789]


2025-12-06 10:59.53 [info     ] CRR_20251206101312: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.0020295727252960206, 'time_algorithm_update': 0.0132043297290802, 'critic_loss': 764261979308163.1, 'actor_loss': -0.00782323545217514, 'time_step': 0.015469869136810303, 'td_error': 670746684343670.1, 'value_scale': 124629674.68482816, 'discounted_advantage': -156965963.49443585, 'initial_state': 159573680.0, 'diff_eval': 111038.51608708678} step=137000
2025-12-06 10:59.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.03it/s, critic_loss=8.58e+14, actor_loss=-0.000385]


2025-12-06 11:00.13 [info     ] CRR_20251206101312: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.00210313606262207, 'time_algorithm_update': 0.013422545433044433, 'critic_loss': 858730343228768.2, 'actor_loss': -0.0003816212630424243, 'time_step': 0.01579391098022461, 'td_error': 728733791675148.5, 'value_scale': 129682736.52640402, 'discounted_advantage': -166214768.16579187, 'initial_state': 166057328.0, 'diff_eval': 111038.93087259502} step=138000
2025-12-06 11:00.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.18it/s, critic_loss=9.3e+14, actor_loss=-0.000385]


2025-12-06 11:00.34 [info     ] CRR_20251206101312: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.001995763301849365, 'time_algorithm_update': 0.013309739351272583, 'critic_loss': 930849992967454.8, 'actor_loss': -0.00038162124156951907, 'time_step': 0.015537049770355225, 'td_error': 791074899362287.0, 'value_scale': 135097116.8549874, 'discounted_advantage': -172891171.50048196, 'initial_state': 173055056.0, 'diff_eval': 111037.11592054726} step=139000
2025-12-06 11:00.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.49it/s, critic_loss=1e+15, actor_loss=-0.00385]  


2025-12-06 11:00.54 [info     ] CRR_20251206101312: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.001918229341506958, 'time_algorithm_update': 0.013760152816772462, 'critic_loss': 1001189155188768.8, 'actor_loss': -0.003816212437168096, 'time_step': 0.015947019100189207, 'td_error': 860233518932561.8, 'value_scale': 140840799.68482816, 'discounted_advantage': -179392905.39994448, 'initial_state': 180483888.0, 'diff_eval': 111041.04298941907} step=140000
2025-12-06 11:00.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.93it/s, critic_loss=1.08e+15, actor_loss=-0.00404]


2025-12-06 11:01.15 [info     ] CRR_20251206101312: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.0020387065410614013, 'time_algorithm_update': 0.013290680408477784, 'critic_loss': 1085341826023424.0, 'actor_loss': -0.004007023036479953, 'time_step': 0.015580082893371582, 'td_error': 934253796087429.2, 'value_scale': 146805034.98072088, 'discounted_advantage': -185477338.47819293, 'initial_state': 188272512.0, 'diff_eval': 111039.13489618804} step=141000
2025-12-06 11:01.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.94it/s, critic_loss=1.19e+15, actor_loss=-0.000385]


2025-12-06 11:01.35 [info     ] CRR_20251206101312: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.002017181396484375, 'time_algorithm_update': 0.013302023887634278, 'critic_loss': 1186542440551547.0, 'actor_loss': -0.0003816212845153295, 'time_step': 0.01558035945892334, 'td_error': 1012654932565762.4, 'value_scale': 152705040.73093042, 'discounted_advantage': -194427741.38247663, 'initial_state': 195920080.0, 'diff_eval': 111040.81715975024} step=142000
2025-12-06 11:01.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.93it/s, critic_loss=1.29e+15, actor_loss=-0.00385]


2025-12-06 11:01.56 [info     ] CRR_20251206101312: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.0019967260360717774, 'time_algorithm_update': 0.013315361499786377, 'critic_loss': 1285493367819468.8, 'actor_loss': -0.0038162124156951903, 'time_step': 0.015569313287734985, 'td_error': 1094939027051840.1, 'value_scale': 158758859.33445096, 'discounted_advantage': -201706200.04309088, 'initial_state': 203797456.0, 'diff_eval': 111040.21154631673} step=143000
2025-12-06 11:01.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.96it/s, critic_loss=1.38e+15, actor_loss=-0.00424]


2025-12-06 11:02.16 [info     ] CRR_20251206101312: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.001980976581573486, 'time_algorithm_update': 0.0130835223197937, 'critic_loss': 1378488097859174.5, 'actor_loss': -0.0041978336572647094, 'time_step': 0.015315608501434326, 'td_error': 1181953500943266.0, 'value_scale': 164915101.88600168, 'discounted_advantage': -209245204.2671815, 'initial_state': 211817120.0, 'diff_eval': 111039.03629141182} step=144000
2025-12-06 11:02.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.78it/s, critic_loss=1.5e+15, actor_loss=-0.00462]


2025-12-06 11:02.37 [info     ] CRR_20251206101312: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.0020850017070770262, 'time_algorithm_update': 0.013537468194961547, 'critic_loss': 1507334936271519.8, 'actor_loss': -0.004579454920307134, 'time_step': 0.015891893863677977, 'td_error': 1275921230459542.8, 'value_scale': 171174436.11567476, 'discounted_advantage': -219263814.4121919, 'initial_state': 219941728.0, 'diff_eval': 111038.1354972283} step=145000
2025-12-06 11:02.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.16it/s, critic_loss=1.62e+15, actor_loss=-0.00404]


2025-12-06 11:02.58 [info     ] CRR_20251206101312: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.0019898681640625, 'time_algorithm_update': 0.013550299644470215, 'critic_loss': 1615729851942043.8, 'actor_loss': -0.004007023057952855, 'time_step': 0.015794933557510375, 'td_error': 1380336075329261.8, 'value_scale': 178049811.76026824, 'discounted_advantage': -226553817.25405207, 'initial_state': 228862688.0, 'diff_eval': 111035.67367013961} step=146000
2025-12-06 11:02.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.04it/s, critic_loss=1.76e+15, actor_loss=-0.00385]


2025-12-06 11:03.18 [info     ] CRR_20251206101312: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.0020768258571624756, 'time_algorithm_update': 0.013495773315429687, 'critic_loss': 1758533717313192.0, 'actor_loss': -0.007632424831390381, 'time_step': 0.015820305824279787, 'td_error': 1492024163612849.2, 'value_scale': 185039815.20871753, 'discounted_advantage': -235450593.23965737, 'initial_state': 237988864.0, 'diff_eval': 111037.62477141235} step=147000
2025-12-06 11:03.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.07it/s, critic_loss=1.86e+15, actor_loss=-0.0121] 


2025-12-06 11:03.39 [info     ] CRR_20251206101312: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.0020312526226043703, 'time_algorithm_update': 0.013248151063919068, 'critic_loss': 1864629448372912.2, 'actor_loss': -0.01202106910943985, 'time_step': 0.015540194034576417, 'td_error': 1610849605460797.5, 'value_scale': 192326657.2053646, 'discounted_advantage': -242334328.1706291, 'initial_state': 247415280.0, 'diff_eval': 111038.1843077539} step=148000
2025-12-06 11:03.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.07it/s, critic_loss=2.02e+15, actor_loss=-0.000385]


2025-12-06 11:04.00 [info     ] CRR_20251206101312: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.0019514100551605224, 'time_algorithm_update': 0.01362071180343628, 'critic_loss': 2023266791278510.0, 'actor_loss': -0.0003816212630424243, 'time_step': 0.015817987442016603, 'td_error': 1733911439005854.8, 'value_scale': 199383914.50796312, 'discounted_advantage': -253219277.88428685, 'initial_state': 256565952.0, 'diff_eval': 111036.56537300104} step=149000
2025-12-06 11:04.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.99it/s, critic_loss=2.16e+15, actor_loss=-0.000193]


2025-12-06 11:04.20 [info     ] CRR_20251206101312: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.0019977633953094484, 'time_algorithm_update': 0.013315128326416016, 'critic_loss': 2157316295811399.8, 'actor_loss': -0.00019081062078475953, 'time_step': 0.015567336797714233, 'td_error': 1869020238291058.5, 'value_scale': 207027318.56496227, 'discounted_advantage': -261139307.10007966, 'initial_state': 266520480.0, 'diff_eval': 111036.04691695208} step=150000
2025-12-06 11:04.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.28it/s, critic_loss=2.34e+15, actor_loss=-0.000963]


2025-12-06 11:04.41 [info     ] CRR_20251206101312: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.0020773303508758545, 'time_algorithm_update': 0.013412562847137452, 'critic_loss': 2341869446416564.0, 'actor_loss': -0.00477026551961899, 'time_step': 0.015753188133239747, 'td_error': 2008906155867589.8, 'value_scale': 214618537.6177703, 'discounted_advantage': -269948307.6086803, 'initial_state': 276372320.0, 'diff_eval': 111036.78742636899} step=151000
2025-12-06 11:04.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.89it/s, critic_loss=2.56e+15, actor_loss=-0.00116]


2025-12-06 11:05.02 [info     ] CRR_20251206101312: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.0020099053382873536, 'time_algorithm_update': 0.013345908403396606, 'critic_loss': 2558853280893501.5, 'actor_loss': -0.0011448637247085595, 'time_step': 0.015593794822692872, 'td_error': 2154595017022176.5, 'value_scale': 222021125.17015925, 'discounted_advantage': -283070360.958718, 'initial_state': 285961024.0, 'diff_eval': 111036.90382557923} step=152000
2025-12-06 11:05.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.36it/s, critic_loss=2.68e+15, actor_loss=-0.000193]


2025-12-06 11:05.22 [info     ] CRR_20251206101312: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.001994206190109253, 'time_algorithm_update': 0.01327971339225769, 'critic_loss': 2678613264748773.5, 'actor_loss': -0.00019081062078475953, 'time_step': 0.015500863552093506, 'td_error': 2318598744423133.5, 'value_scale': 230396114.91366303, 'discounted_advantage': -290669611.6061275, 'initial_state': 296883744.0, 'diff_eval': 111040.01214584261} step=153000
2025-12-06 11:05.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.85it/s, critic_loss=2.94e+15, actor_loss=-0.000193]


2025-12-06 11:05.42 [info     ] CRR_20251206101312: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.0019813611507415773, 'time_algorithm_update': 0.01315306043624878, 'critic_loss': 2942785244220096.5, 'actor_loss': -0.00019081062078475953, 'time_step': 0.015376569032669067, 'td_error': 2488883464305820.0, 'value_scale': 238463260.04526404, 'discounted_advantage': -303804483.55166894, 'initial_state': 307314944.0, 'diff_eval': 111038.12823563069} step=154000
2025-12-06 11:05.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.27it/s, critic_loss=3.13e+15, actor_loss=-0.000578]


2025-12-06 11:06.03 [info     ] CRR_20251206101312: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.0020000486373901367, 'time_algorithm_update': 0.013209441184997558, 'critic_loss': 3128601850454999.0, 'actor_loss': -0.000572431862354281, 'time_step': 0.015482027769088745, 'td_error': 2671648614216002.5, 'value_scale': 247128824.4191115, 'discounted_advantage': -311655487.47464395, 'initial_state': 318646464.0, 'diff_eval': 111034.9327170539} step=155000
2025-12-06 11:06.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.20it/s, critic_loss=3.39e+15, actor_loss=-0.00404]


2025-12-06 11:06.23 [info     ] CRR_20251206101312: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.001984581232070923, 'time_algorithm_update': 0.013041779041290284, 'critic_loss': 3391090872761188.5, 'actor_loss': -0.004007023036479953, 'time_step': 0.01528956961631775, 'td_error': 2863595639781926.0, 'value_scale': 255628572.83989942, 'discounted_advantage': -325685147.35123223, 'initial_state': 329790560.0, 'diff_eval': 111038.61964179884} step=156000
2025-12-06 11:06.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:15<00:00, 65.68it/s, critic_loss=3.58e+15, actor_loss=-0.000385]


2025-12-06 11:06.42 [info     ] CRR_20251206101312: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.0019112310409545898, 'time_algorithm_update': 0.012775047063827514, 'critic_loss': 3587918472962638.0, 'actor_loss': -0.0003816212415695239, 'time_step': 0.014931852102279663, 'td_error': 3067081338714491.0, 'value_scale': 264558742.81642917, 'discounted_advantage': -335535302.05661327, 'initial_state': 341507616.0, 'diff_eval': 111037.67102204265} step=157000
2025-12-06 11:06.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.72it/s, critic_loss=3.84e+15, actor_loss=-0.00424]


2025-12-06 11:07.03 [info     ] CRR_20251206101312: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.0020612668991088867, 'time_algorithm_update': 0.013840867042541503, 'critic_loss': 3842925526746398.5, 'actor_loss': -0.0041978336572647094, 'time_step': 0.01615924334526062, 'td_error': 3275298279552983.5, 'value_scale': 273232126.2011735, 'discounted_advantage': -348777134.10554004, 'initial_state': 352815520.0, 'diff_eval': 111035.32304347791} step=158000
2025-12-06 11:07.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.45it/s, critic_loss=4.12e+15, actor_loss=-0.00828]


2025-12-06 11:07.24 [info     ] CRR_20251206101312: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.002022603750228882, 'time_algorithm_update': 0.013210457086563111, 'critic_loss': 4126942126321697.0, 'actor_loss': -0.008204856693744659, 'time_step': 0.01547770118713379, 'td_error': 3498374275086618.0, 'value_scale': 282332308.79798824, 'discounted_advantage': -359628422.5230977, 'initial_state': 364751968.0, 'diff_eval': 111038.62857840986} step=159000
2025-12-06 11:07.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.78it/s, critic_loss=4.44e+15, actor_loss=-0.00404]


2025-12-06 11:07.44 [info     ] CRR_20251206101312: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.0019394934177398681, 'time_algorithm_update': 0.01298058533668518, 'critic_loss': 4432402656060768.5, 'actor_loss': -0.00400702303647995, 'time_step': 0.015160100936889648, 'td_error': 3738342972720235.5, 'value_scale': 291785886.56496227, 'discounted_advantage': -371238238.8859612, 'initial_state': 377069472.0, 'diff_eval': 111038.99548513792} step=160000
2025-12-06 11:07.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.76it/s, critic_loss=4.73e+15, actor_loss=-0.00154]


2025-12-06 11:08.04 [info     ] CRR_20251206101312: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.0020713789463043214, 'time_algorithm_update': 0.013336453676223756, 'critic_loss': 4732611302608863.0, 'actor_loss': -0.0015264849662780763, 'time_step': 0.015646642923355103, 'td_error': 3990538877745020.5, 'value_scale': 301402656.64710814, 'discounted_advantage': -382999965.55410683, 'initial_state': 389689888.0, 'diff_eval': 111038.10742433074} step=161000
2025-12-06 11:08.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.63it/s, critic_loss=5.05e+15, actor_loss=-0.00809]


2025-12-06 11:08.24 [info     ] CRR_20251206101312: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.0019837071895599367, 'time_algorithm_update': 0.013211114168167113, 'critic_loss': 5056648039691715.0, 'actor_loss': -0.0080140460729599, 'time_step': 0.01543084478378296, 'td_error': 4252839560805015.5, 'value_scale': 310983873.1198659, 'discounted_advantage': -396696671.2971951, 'initial_state': 402158624.0, 'diff_eval': 111038.4736993219} step=162000
2025-12-06 11:08.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.61it/s, critic_loss=5.33e+15, actor_loss=-0.000193]


2025-12-06 11:08.45 [info     ] CRR_20251206101312: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.0020428946018218996, 'time_algorithm_update': 0.013391916751861572, 'critic_loss': 5321383025957667.0, 'actor_loss': -0.00019081062078476436, 'time_step': 0.01567590856552124, 'td_error': 4536710723645283.0, 'value_scale': 321172849.26068735, 'discounted_advantage': -409251745.1613317, 'initial_state': 415473344.0, 'diff_eval': 111038.70413144057} step=163000
2025-12-06 11:08.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.19it/s, critic_loss=5.67e+15, actor_loss=-0.0117] 


2025-12-06 11:09.05 [info     ] CRR_20251206101312: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.0019337587356567383, 'time_algorithm_update': 0.013128369092941284, 'critic_loss': 5673855016272658.0, 'actor_loss': -0.01163944786787033, 'time_step': 0.015306024074554444, 'td_error': 4842992251311308.0, 'value_scale': 331824169.736798, 'discounted_advantage': -421794550.98186374, 'initial_state': 429405440.0, 'diff_eval': 111038.8184365259} step=164000
2025-12-06 11:09.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.35it/s, critic_loss=6.07e+15, actor_loss=-0.0052] 


2025-12-06 11:09.26 [info     ] CRR_20251206101312: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.002109412908554077, 'time_algorithm_update': 0.013369563102722169, 'critic_loss': 6072495896048697.0, 'actor_loss': -0.005151886761188507, 'time_step': 0.015738280773162843, 'td_error': 5160430928254095.0, 'value_scale': 342424011.4367142, 'discounted_advantage': -436220991.09502625, 'initial_state': 443279168.0, 'diff_eval': 111038.75443239044} step=165000
2025-12-06 11:09.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.74it/s, critic_loss=6.39e+15, actor_loss=-0.00828]


2025-12-06 11:09.47 [info     ] CRR_20251206101312: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.0020820229053497316, 'time_algorithm_update': 0.013555680274963379, 'critic_loss': 6389812073659892.0, 'actor_loss': -0.008204856693744662, 'time_step': 0.01588653230667114, 'td_error': 5499266159792876.0, 'value_scale': 353569379.55071247, 'discounted_advantage': -446882022.892631, 'initial_state': 457885536.0, 'diff_eval': 111036.80012761617} step=166000
2025-12-06 11:09.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.39it/s, critic_loss=6.93e+15, actor_loss=-0.000578]


2025-12-06 11:10.09 [info     ] CRR_20251206101312: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.0021754968166351318, 'time_algorithm_update': 0.014003840446472168, 'critic_loss': 6927422760759067.0, 'actor_loss': -0.000572431862354281, 'time_step': 0.017432413339614868, 'td_error': 5842383614822482.0, 'value_scale': 364144014.8466052, 'discounted_advantage': -464643834.5588156, 'initial_state': 471688640.0, 'diff_eval': 111037.73961246104} step=167000
2025-12-06 11:10.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.97it/s, critic_loss=7.26e+15, actor_loss=-0.00424]


2025-12-06 11:10.29 [info     ] CRR_20251206101312: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.001976736545562744, 'time_algorithm_update': 0.013107178449630738, 'critic_loss': 7267109907294323.0, 'actor_loss': -0.0041978336572647094, 'time_step': 0.015333370447158814, 'td_error': 6219843961776392.0, 'value_scale': 375866062.0050293, 'discounted_advantage': -475386723.9406314, 'initial_state': 487051648.0, 'diff_eval': 111038.89825086441} step=168000
2025-12-06 11:10.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.00it/s, critic_loss=7.86e+15, actor_loss=-0.00404]


2025-12-06 11:10.50 [info     ] CRR_20251206101312: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.002023648500442505, 'time_algorithm_update': 0.013306622505187988, 'critic_loss': 7861385887051940.0, 'actor_loss': -0.00400702303647995, 'time_step': 0.015574236869812012, 'td_error': 6600966951541230.0, 'value_scale': 386913855.41659683, 'discounted_advantage': -494066441.2503743, 'initial_state': 501416416.0, 'diff_eval': 111035.22705323699} step=169000
2025-12-06 11:10.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.14it/s, critic_loss=8.29e+15, actor_loss=-0.00424]


2025-12-06 11:11.10 [info     ] CRR_20251206101312: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.002023631572723389, 'time_algorithm_update': 0.013287519216537475, 'critic_loss': 8291855097239962.0, 'actor_loss': -0.004197833657264712, 'time_step': 0.015550189733505249, 'td_error': 7016581629500352.0, 'value_scale': 398893040.42581725, 'discounted_advantage': -508215497.3810247, 'initial_state': 517143520.0, 'diff_eval': 111037.55258907756} step=170000
2025-12-06 11:11.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.23it/s, critic_loss=8.75e+15, actor_loss=-0.00077]


2025-12-06 11:11.31 [info     ] CRR_20251206101312: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.001987328767776489, 'time_algorithm_update': 0.013299798250198364, 'critic_loss': 8736391038421172.0, 'actor_loss': -0.0007632424831390381, 'time_step': 0.015532008171081543, 'td_error': 7451032147761688.0, 'value_scale': 411060913.022632, 'discounted_advantage': -522312231.3279991, 'initial_state': 533049248.0, 'diff_eval': 111037.45998324372} step=171000
2025-12-06 11:11.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.28it/s, critic_loss=9.39e+15, actor_loss=-0.00116]


2025-12-06 11:11.52 [info     ] CRR_20251206101312: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.0020647306442260743, 'time_algorithm_update': 0.013666683435440063, 'critic_loss': 9396754849804058.0, 'actor_loss': -0.0011448637247085571, 'time_step': 0.015992393255233765, 'td_error': 7892136935871101.0, 'value_scale': 422750006.71248955, 'discounted_advantage': -541256843.8073827, 'initial_state': 548343296.0, 'diff_eval': 111035.79143248539} step=172000
2025-12-06 11:11.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.59it/s, critic_loss=9.84e+15, actor_loss=-0.000578]


2025-12-06 11:12.12 [info     ] CRR_20251206101312: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.002002746105194092, 'time_algorithm_update': 0.013404157876968383, 'critic_loss': 9852284875934008.0, 'actor_loss': -0.0005724318623542786, 'time_step': 0.015669490337371826, 'td_error': 8380562075488156.0, 'value_scale': 435736809.13663036, 'discounted_advantage': -553419032.4480112, 'initial_state': 565431488.0, 'diff_eval': 111035.62025184343} step=173000
2025-12-06 11:12.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.71it/s, critic_loss=1.05e+16, actor_loss=-0.00462]


2025-12-06 11:12.33 [info     ] CRR_20251206101312: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.0019946210384368898, 'time_algorithm_update': 0.013113085746765136, 'critic_loss': 1.0479213043911754e+16, 'actor_loss': -0.0045794548988342286, 'time_step': 0.015380634546279907, 'td_error': 8883938247502794.0, 'value_scale': 448555314.79966474, 'discounted_advantage': -569336066.8789623, 'initial_state': 582314368.0, 'diff_eval': 111038.66152061342} step=174000
2025-12-06 11:12.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.12it/s, critic_loss=1.1e+16, actor_loss=-0.000193]


2025-12-06 11:12.53 [info     ] CRR_20251206101312: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.0019633848667144774, 'time_algorithm_update': 0.013041581869125365, 'critic_loss': 1.105352568093855e+16, 'actor_loss': -0.00019081062078475953, 'time_step': 0.015271256923675538, 'td_error': 9421280420163182.0, 'value_scale': 461925412.21793795, 'discounted_advantage': -583248403.8609997, 'initial_state': 599751936.0, 'diff_eval': 111037.92415555584} step=175000
2025-12-06 11:12.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:15<00:00, 65.12it/s, critic_loss=1.18e+16, actor_loss=-0.00116]


2025-12-06 11:13.13 [info     ] CRR_20251206101312: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.0019033684730529784, 'time_algorithm_update': 0.012931644201278687, 'critic_loss': 1.175416860615639e+16, 'actor_loss': -0.0011448637247085571, 'time_step': 0.015085444688796997, 'td_error': 9965928481683404.0, 'value_scale': 474837056.0502934, 'discounted_advantage': -602906931.6841865, 'initial_state': 616707584.0, 'diff_eval': 111038.91867927494} step=176000
2025-12-06 11:13.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.27it/s, critic_loss=1.24e+16, actor_loss=-0.000578]


2025-12-06 11:13.33 [info     ] CRR_20251206101312: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.0020289154052734377, 'time_algorithm_update': 0.013483335256576539, 'critic_loss': 1.2448137488009004e+16, 'actor_loss': -0.0005724318623542786, 'time_step': 0.01576122522354126, 'td_error': 1.0546423870854124e+16, 'value_scale': 488420284.22464377, 'discounted_advantage': -618071359.795002, 'initial_state': 634495872.0, 'diff_eval': 111036.92307094116} step=177000
2025-12-06 11:13.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.90it/s, critic_loss=1.31e+16, actor_loss=-0.00424]


2025-12-06 11:13.54 [info     ] CRR_20251206101312: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.001975882530212402, 'time_algorithm_update': 0.013130942821502685, 'critic_loss': 1.3119731194151306e+16, 'actor_loss': -0.0041978336572647094, 'time_step': 0.015350924968719483, 'td_error': 1.1153632736081092e+16, 'value_scale': 502237933.3042749, 'discounted_advantage': -634838471.4735689, 'initial_state': 652654592.0, 'diff_eval': 111036.8618162016} step=178000
2025-12-06 11:13.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.40it/s, critic_loss=1.4e+16, actor_loss=-0.000385]


2025-12-06 11:14.14 [info     ] CRR_20251206101312: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.0020210511684417725, 'time_algorithm_update': 0.013195268630981445, 'critic_loss': 1.3941045796469212e+16, 'actor_loss': -0.00038162124156951907, 'time_step': 0.01547480297088623, 'td_error': 1.1798587035939602e+16, 'value_scale': 516310651.9865884, 'discounted_advantage': -654616536.715842, 'initial_state': 671114432.0, 'diff_eval': 111037.69771440991} step=179000
2025-12-06 11:14.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.38it/s, critic_loss=1.48e+16, actor_loss=-0.00847]


2025-12-06 11:14.34 [info     ] CRR_20251206101312: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.0019776973724365235, 'time_algorithm_update': 0.013007536172866822, 'critic_loss': 1.474129950454422e+16, 'actor_loss': -0.008395667314529419, 'time_step': 0.015236839532852172, 'td_error': 1.2467640483955834e+16, 'value_scale': 530770306.23637885, 'discounted_advantage': -670229329.5347757, 'initial_state': 690076032.0, 'diff_eval': 111036.61558165634} step=180000
2025-12-06 11:14.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.91it/s, critic_loss=1.57e+16, actor_loss=-0.00443]


2025-12-06 11:14.54 [info     ] CRR_20251206101312: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.0019780871868133546, 'time_algorithm_update': 0.013133611440658569, 'critic_loss': 1.5675602272958546e+16, 'actor_loss': -0.004388644278049469, 'time_step': 0.01535628318786621, 'td_error': 1.3141923993382446e+16, 'value_scale': 544571590.9371333, 'discounted_advantage': -692986277.3665757, 'initial_state': 708155008.0, 'diff_eval': 111038.36432493583} step=181000
2025-12-06 11:14.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.29it/s, critic_loss=1.62e+16, actor_loss=-0.00424]


2025-12-06 11:15.15 [info     ] CRR_20251206101312: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.0020046775341033937, 'time_algorithm_update': 0.01325701642036438, 'critic_loss': 1.6237109474387558e+16, 'actor_loss': -0.0041978336572647094, 'time_step': 0.015495569944381714, 'td_error': 1.3864269754548046e+16, 'value_scale': 559366476.1978207, 'discounted_advantage': -709313436.3580886, 'initial_state': 727587200.0, 'diff_eval': 111036.8842220898} step=182000
2025-12-06 11:15.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.54it/s, critic_loss=1.75e+16, actor_loss=-0.00077]


2025-12-06 11:15.35 [info     ] CRR_20251206101312: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.0020551924705505373, 'time_algorithm_update': 0.01339358925819397, 'critic_loss': 1.7533076530698126e+16, 'actor_loss': -0.0007632424831390381, 'time_step': 0.01569528603553772, 'td_error': 1.4609745626876048e+16, 'value_scale': 573891558.8935456, 'discounted_advantage': -731650034.2729666, 'initial_state': 746716352.0, 'diff_eval': 111036.93289673158} step=183000
2025-12-06 11:15.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.40it/s, critic_loss=1.85e+16, actor_loss=-0.00866]


2025-12-06 11:15.56 [info     ] CRR_20251206101312: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.0019783816337585448, 'time_algorithm_update': 0.013224704265594482, 'critic_loss': 1.849802348623718e+16, 'actor_loss': -0.008586477935314179, 'time_step': 0.015470517158508302, 'td_error': 1.5414333569753364e+16, 'value_scale': 589225324.9388098, 'discounted_advantage': -753008136.442758, 'initial_state': 766777344.0, 'diff_eval': 111038.12737428292} step=184000
2025-12-06 11:15.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.68it/s, critic_loss=1.93e+16, actor_loss=-0.000578]


2025-12-06 11:16.16 [info     ] CRR_20251206101312: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.0020173256397247315, 'time_algorithm_update': 0.013407893657684326, 'critic_loss': 1.934541584631805e+16, 'actor_loss': -0.0005724318623542786, 'time_step': 0.015663275003433227, 'td_error': 1.6271087518958192e+16, 'value_scale': 605496376.9991617, 'discounted_advantage': -770031671.3030473, 'initial_state': 788324736.0, 'diff_eval': 111035.81912863285} step=185000
2025-12-06 11:16.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.32it/s, critic_loss=2.02e+16, actor_loss=-0.00116]


2025-12-06 11:16.37 [info     ] CRR_20251206101312: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.0020498759746551513, 'time_algorithm_update': 0.013441354513168335, 'critic_loss': 2.013889862300808e+16, 'actor_loss': -0.0011448637247085571, 'time_step': 0.015742407083511354, 'td_error': 1.7151700261549196e+16, 'value_scale': 621747885.5356245, 'discounted_advantage': -787440989.8971161, 'initial_state': 809686976.0, 'diff_eval': 111037.0258680601} step=186000
2025-12-06 11:16.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.21it/s, critic_loss=2.15e+16, actor_loss=-0.00809]


2025-12-06 11:16.57 [info     ] CRR_20251206101312: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.002062784194946289, 'time_algorithm_update': 0.013447738409042358, 'critic_loss': 2.1463920955371816e+16, 'actor_loss': -0.0080140460729599, 'time_step': 0.015759241104125975, 'td_error': 1.8069642666018856e+16, 'value_scale': 637989224.7074602, 'discounted_advantage': -809483134.0678537, 'initial_state': 830925312.0, 'diff_eval': 111041.84655014767} step=187000
2025-12-06 11:16.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.13it/s, critic_loss=2.28e+16, actor_loss=-0.00424]


2025-12-06 11:17.18 [info     ] CRR_20251206101312: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.0020647821426391603, 'time_algorithm_update': 0.013468646287918091, 'critic_loss': 2.2821742501813028e+16, 'actor_loss': -0.0041978336572647094, 'time_step': 0.015782030820846556, 'td_error': 1.9009688590124444e+16, 'value_scale': 654130630.9807209, 'discounted_advantage': -833261996.480662, 'initial_state': 852147008.0, 'diff_eval': 111040.8762608134} step=188000
2025-12-06 11:17.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.59it/s, critic_loss=2.38e+16, actor_loss=-0.00789]


2025-12-06 11:17.38 [info     ] CRR_20251206101312: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.002052366018295288, 'time_algorithm_update': 0.013368231773376465, 'critic_loss': 2.3862064939395972e+16, 'actor_loss': -0.0080140460729599, 'time_step': 0.015666298866271973, 'td_error': 2.0007919649043816e+16, 'value_scale': 671000728.1173512, 'discounted_advantage': -854269010.7500767, 'initial_state': 874212416.0, 'diff_eval': 111035.63150113946} step=189000
2025-12-06 11:17.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:16<00:00, 62.42it/s, critic_loss=2.5e+16, actor_loss=-0.000385]


2025-12-06 11:17.59 [info     ] CRR_20251206101312: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.002027568817138672, 'time_algorithm_update': 0.013431979179382323, 'critic_loss': 2.502452339579814e+16, 'actor_loss': -0.00038162124156951907, 'time_step': 0.015717262983322145, 'td_error': 2.106660945060666e+16, 'value_scale': 688450994.48114, 'discounted_advantage': -876281044.316621, 'initial_state': 897200128.0, 'diff_eval': 111036.99748553574} step=190000
2025-12-06 11:17.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.92it/s, critic_loss=2.62e+16, actor_loss=-0.00847]


2025-12-06 11:18.20 [info     ] CRR_20251206101312: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.0020115909576416016, 'time_algorithm_update': 0.013346739053726197, 'critic_loss': 2.625079448503006e+16, 'actor_loss': -0.008395667314529419, 'time_step': 0.015594627618789672, 'td_error': 2.2161592008340104e+16, 'value_scale': 706111439.658005, 'discounted_advantage': -896182399.1733048, 'initial_state': 920395520.0, 'diff_eval': 111039.09172565879} step=191000
2025-12-06 11:18.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.93it/s, critic_loss=2.77e+16, actor_loss=-0.000578]


2025-12-06 11:18.40 [info     ] CRR_20251206101312: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.0020277910232543946, 'time_algorithm_update': 0.013351571559906007, 'critic_loss': 2.769913322959117e+16, 'actor_loss': -0.0005724318623542786, 'time_step': 0.015614817142486572, 'td_error': 2.32768501613201e+16, 'value_scale': 723443810.5012573, 'discounted_advantage': -920423275.6122394, 'initial_state': 943102528.0, 'diff_eval': 111034.4938831825} step=192000
2025-12-06 11:18.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.88it/s, critic_loss=2.94e+16, actor_loss=-0.00424]


2025-12-06 11:19.01 [info     ] CRR_20251206101312: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.002021252393722534, 'time_algorithm_update': 0.014119118690490722, 'critic_loss': 2.9389757438555588e+16, 'actor_loss': -0.0041978336572647094, 'time_step': 0.016393628358840943, 'td_error': 2.442811652510195e+16, 'value_scale': 740774673.8038558, 'discounted_advantage': -946727900.0436552, 'initial_state': 965891712.0, 'diff_eval': 111038.76452667416} step=193000
2025-12-06 11:19.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.91it/s, critic_loss=3.05e+16, actor_loss=-0.000578]


2025-12-06 11:19.22 [info     ] CRR_20251206101312: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.002046406030654907, 'time_algorithm_update': 0.013754415988922119, 'critic_loss': 3.05103751175922e+16, 'actor_loss': -0.0005724318623542786, 'time_step': 0.016074039936065675, 'td_error': 2.5664932008229084e+16, 'value_scale': 759503627.6144174, 'discounted_advantage': -965747713.8133578, 'initial_state': 990575552.0, 'diff_eval': 111038.65450417965} step=194000
2025-12-06 11:19.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:15<00:00, 64.15it/s, critic_loss=3.19e+16, actor_loss=-0.000578]


2025-12-06 11:19.43 [info     ] CRR_20251206101312: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.0018592293262481689, 'time_algorithm_update': 0.013148022413253784, 'critic_loss': 3.197168752168796e+16, 'actor_loss': -0.0005724318623542786, 'time_step': 0.015270991086959838, 'td_error': 2.693534848934929e+16, 'value_scale': 778026301.4383906, 'discounted_advantage': -988178340.9388961, 'initial_state': 1014961088.0, 'diff_eval': 111039.60499213348} step=195000
2025-12-06 11:19.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.94it/s, critic_loss=3.34e+16, actor_loss=-0.000578]


2025-12-06 11:20.04 [info     ] CRR_20251206101312: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.0023922185897827146, 'time_algorithm_update': 0.014233415365219116, 'critic_loss': 3.3463210345639184e+16, 'actor_loss': -0.0005724318623542786, 'time_step': 0.016910199642181396, 'td_error': 2.8288692454323484e+16, 'value_scale': 797352967.658005, 'discounted_advantage': -1009159784.2193476, 'initial_state': 1040255744.0, 'diff_eval': 111039.90518666833} step=196000
2025-12-06 11:20.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:15<00:00, 62.71it/s, critic_loss=3.52e+16, actor_loss=-0.00443]


2025-12-06 11:20.25 [info     ] CRR_20251206101312: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.0019426908493041992, 'time_algorithm_update': 0.013380055665969849, 'critic_loss': 3.522098153643835e+16, 'actor_loss': -0.004388644278049469, 'time_step': 0.015608755111694336, 'td_error': 2.965841162478408e+16, 'value_scale': 816255700.0569992, 'discounted_advantage': -1033791827.8301281, 'initial_state': 1065017024.0, 'diff_eval': 111039.24262073194} step=197000
2025-12-06 11:20.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:15<00:00, 63.79it/s, critic_loss=3.74e+16, actor_loss=-0.00116]


2025-12-06 11:20.46 [info     ] CRR_20251206101312: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.001990849733352661, 'time_algorithm_update': 0.013093192338943481, 'critic_loss': 3.744460306129512e+16, 'actor_loss': -0.0011448637247085571, 'time_step': 0.015342094659805298, 'td_error': 3.1040454397761976e+16, 'value_scale': 834482207.9262364, 'discounted_advantage': -1065850696.708483, 'initial_state': 1089000576.0, 'diff_eval': 111038.4434504417} step=198000
2025-12-06 11:20.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.95it/s, critic_loss=3.88e+16, actor_loss=-0.00443]


2025-12-06 11:21.07 [info     ] CRR_20251206101312: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.0020035510063171388, 'time_algorithm_update': 0.01385337996482849, 'critic_loss': 3.886586427258542e+16, 'actor_loss': -0.004388644278049469, 'time_step': 0.01610317802429199, 'td_error': 3.253598672490224e+16, 'value_scale': 854373562.6823134, 'discounted_advantage': -1088808226.8752356, 'initial_state': 1115219840.0, 'diff_eval': 111037.13298276404} step=199000
2025-12-06 11:21.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.75it/s, critic_loss=4.08e+16, actor_loss=-0.000385]


2025-12-06 11:21.28 [info     ] CRR_20251206101312: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.0018586273193359375, 'time_algorithm_update': 0.013782389879226685, 'critic_loss': 4.07656793250988e+16, 'actor_loss': -0.00038162124156951907, 'time_step': 0.015905971050262452, 'td_error': 3.4093092792254092e+16, 'value_scale': 874483476.7544007, 'discounted_advantage': -1114677448.7218537, 'initial_state': 1141860480.0, 'diff_eval': 111038.67631946446} step=200000
2025-12-06 11:21.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CRR_20251206101312\model_200000.d3
Training model:  CalQL
2025-12-06 11:21.28 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=1)
2025-12-06 11:21.28 [de

Epoch 1/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.89it/s, critic_loss=-50.2, conservative_loss=-53.8, alpha=0.948, actor_loss=-3.27, temp=0.96, temp_loss=0.911]


2025-12-06 11:22.19 [info     ] CalQL_20251206112128: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.004877896070480347, 'time_algorithm_update': 0.04204398322105408, 'critic_loss': -50.24221371459961, 'conservative_loss': -53.79638618850708, 'alpha': 0.947712759912014, 'actor_loss': -3.282431623339653, 'temp': 0.9593751858472824, 'temp_loss': 0.9086472073495389, 'time_step': 0.04719611668586731, 'td_error': 2.1209309253892923, 'value_scale': 5.818062939577286, 'discounted_advantage': -1.6300197114546973, 'initial_state': 7.5238423347473145, 'diff_eval': 2869.886463591634} step=1000
2025-12-06 11:22.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.35it/s, critic_loss=-52.9, conservative_loss=-57.1, alpha=0.853, actor_loss=-4.81, temp=0.891, temp_loss=0.522]


2025-12-06 11:23.12 [info     ] CalQL_20251206112128: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.005110318422317505, 'time_algorithm_update': 0.04299753665924072, 'critic_loss': -52.91200329208374, 'conservative_loss': -57.04438134765625, 'alpha': 0.8529770316481591, 'actor_loss': -4.81531733584404, 'temp': 0.8903915401101112, 'temp_loss': 0.5213649101257324, 'time_step': 0.04841613698005676, 'td_error': 2.8700335401839214, 'value_scale': 7.3591552862065335, 'discounted_advantage': -2.33477846795683, 'initial_state': 9.756269454956055, 'diff_eval': 2672.728763845546} step=2000
2025-12-06 11:23.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:46<00:00, 21.51it/s, critic_loss=-48.4, conservative_loss=-53, alpha=0.773, actor_loss=-5.19, temp=0.832, temp_loss=0.367] 


2025-12-06 11:24.02 [info     ] CalQL_20251206112128: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.004756067752838135, 'time_algorithm_update': 0.04078134155273438, 'critic_loss': -48.35361545562744, 'conservative_loss': -53.00485580825806, 'alpha': 0.773115232527256, 'actor_loss': -5.192120276927948, 'temp': 0.8316244547367095, 'temp_loss': 0.36629130502045154, 'time_step': 0.045824604272842406, 'td_error': 2.871692904454545, 'value_scale': 7.552547699825687, 'discounted_advantage': -2.870805816187386, 'initial_state': 9.783387184143066, 'diff_eval': 2581.5031463894884} step=3000
2025-12-06 11:24.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.44it/s, critic_loss=-43.8, conservative_loss=-48.6, alpha=0.704, actor_loss=-5.22, temp=0.778, temp_loss=0.281]


2025-12-06 11:24.54 [info     ] CalQL_20251206112128: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.0050785536766052245, 'time_algorithm_update': 0.04286307740211487, 'critic_loss': -43.795963985443116, 'conservative_loss': -48.567595703125, 'alpha': 0.7034711431860924, 'actor_loss': -5.223992444992065, 'temp': 0.777413328230381, 'temp_loss': 0.2805438754633069, 'time_step': 0.048230849504470825, 'td_error': 2.7683821270708884, 'value_scale': 7.677620538922569, 'discounted_advantage': -2.826455267970909, 'initial_state': 10.135870933532715, 'diff_eval': 2522.764361360853} step=4000
2025-12-06 11:24.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.53it/s, critic_loss=-39.8, conservative_loss=-44.6, alpha=0.642, actor_loss=-5.17, temp=0.726, temp_loss=0.219]


2025-12-06 11:25.47 [info     ] CalQL_20251206112128: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.005070085525512695, 'time_algorithm_update': 0.04263201212882996, 'critic_loss': -39.74370253372192, 'conservative_loss': -44.53455649566651, 'alpha': 0.6413205625414848, 'actor_loss': -5.167185557365418, 'temp': 0.7258580771684646, 'temp_loss': 0.2185943142324686, 'time_step': 0.047980765104293825, 'td_error': 2.8681544502125416, 'value_scale': 7.693717869104816, 'discounted_advantage': -3.063792290056351, 'initial_state': 9.992668151855469, 'diff_eval': 2474.813832022962} step=5000
2025-12-06 11:25.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.01it/s, critic_loss=-35.9, conservative_loss=-40.8, alpha=0.585, actor_loss=-5.07, temp=0.678, temp_loss=0.174]


2025-12-06 11:26.40 [info     ] CalQL_20251206112128: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.005209938287734985, 'time_algorithm_update': 0.04370072078704834, 'critic_loss': -35.89528101730347, 'conservative_loss': -40.737022422790524, 'alpha': 0.5851766675114631, 'actor_loss': -5.066557836532593, 'temp': 0.6774221297502517, 'temp_loss': 0.17365091491956264, 'time_step': 0.04920407557487488, 'td_error': 2.6794462645738237, 'value_scale': 7.345959905769179, 'discounted_advantage': -2.9519628846881805, 'initial_state': 9.471783638000488, 'diff_eval': 2486.4000311937693} step=6000
2025-12-06 11:26.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.55it/s, critic_loss=-32.4, conservative_loss=-37.2, alpha=0.534, actor_loss=-4.95, temp=0.632, temp_loss=0.137]


2025-12-06 11:27.33 [info     ] CalQL_20251206112128: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.005144121646881103, 'time_algorithm_update': 0.04250975632667541, 'critic_loss': -32.41542908668518, 'conservative_loss': -37.19253509902954, 'alpha': 0.5342439284324646, 'actor_loss': -4.946411080598831, 'temp': 0.6316502040028572, 'temp_loss': 0.13739816805208102, 'time_step': 0.04795594429969788, 'td_error': 2.53188357634556, 'value_scale': 7.024895239824372, 'discounted_advantage': -2.7141084567125993, 'initial_state': 9.128314971923828, 'diff_eval': 2432.7542573944984} step=7000
2025-12-06 11:27.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.16it/s, critic_loss=-29.2, conservative_loss=-34, alpha=0.488, actor_loss=-4.82, temp=0.591, temp_loss=0.103] 


2025-12-06 11:28.26 [info     ] CalQL_20251206112128: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.00522388768196106, 'time_algorithm_update': 0.04335189294815064, 'critic_loss': -29.21853335380554, 'conservative_loss': -34.025624601364136, 'alpha': 0.48784824830293655, 'actor_loss': -4.816500514268875, 'temp': 0.5905563725829125, 'temp_loss': 0.1027048309603706, 'time_step': 0.04888175868988037, 'td_error': 2.519566943234982, 'value_scale': 6.6747768814898, 'discounted_advantage': -2.756462824354868, 'initial_state': 8.823675155639648, 'diff_eval': 2401.658336705685} step=8000
2025-12-06 11:28.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.82it/s, critic_loss=-26.2, conservative_loss=-31.1, alpha=0.446, actor_loss=-4.76, temp=0.554, temp_loss=0.0766]


2025-12-06 11:29.20 [info     ] CalQL_20251206112128: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.00536957049369812, 'time_algorithm_update': 0.04402677702903748, 'critic_loss': -26.19195825958252, 'conservative_loss': -31.073365379333495, 'alpha': 0.4455334196686745, 'actor_loss': -4.757882261037826, 'temp': 0.5539438654780388, 'temp_loss': 0.0765853391494602, 'time_step': 0.049689191818237305, 'td_error': 2.6383220828233362, 'value_scale': 7.229324245367946, 'discounted_advantage': -3.2402654884638586, 'initial_state': 8.911416053771973, 'diff_eval': 2374.6028872651764} step=9000
2025-12-06 11:29.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.16it/s, critic_loss=-23.6, conservative_loss=-28.4, alpha=0.407, actor_loss=-4.66, temp=0.522, temp_loss=0.0546]


2025-12-06 11:30.13 [info     ] CalQL_20251206112128: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.005152557373046875, 'time_algorithm_update': 0.04337245392799378, 'critic_loss': -23.544603675842286, 'conservative_loss': -28.43045135688782, 'alpha': 0.40691317269206045, 'actor_loss': -4.663212603569031, 'temp': 0.5216193000078201, 'temp_loss': 0.054671580207999795, 'time_step': 0.04882699513435364, 'td_error': 2.792123963864499, 'value_scale': 7.154270351517005, 'discounted_advantage': -2.9989390885652925, 'initial_state': 8.880804061889648, 'diff_eval': 2458.9347042826394} step=10000
2025-12-06 11:30.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:52<00:00, 19.20it/s, critic_loss=-21.1, conservative_loss=-25.9, alpha=0.372, actor_loss=-4.62, temp=0.493, temp_loss=0.0433]


2025-12-06 11:31.09 [info     ] CalQL_20251206112128: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.006921339273452759, 'time_algorithm_update': 0.043964348554611205, 'critic_loss': -21.076114345550536, 'conservative_loss': -25.90007455635071, 'alpha': 0.3716828655302525, 'actor_loss': -4.620056823968888, 'temp': 0.4925222927033901, 'temp_loss': 0.04320731185283512, 'time_step': 0.05119888830184936, 'td_error': 2.729906147695681, 'value_scale': 6.977535964661939, 'discounted_advantage': -3.274546136928416, 'initial_state': 8.412601470947266, 'diff_eval': 2454.9420592851466} step=11000
2025-12-06 11:31.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.68it/s, critic_loss=-18.7, conservative_loss=-23.7, alpha=0.34, actor_loss=-4.61, temp=0.467, temp_loss=0.0295]


2025-12-06 11:32.03 [info     ] CalQL_20251206112128: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.005132571935653686, 'time_algorithm_update': 0.044587824821472166, 'critic_loss': -18.680087856292726, 'conservative_loss': -23.65051640892029, 'alpha': 0.33952282840013503, 'actor_loss': -4.613639519453049, 'temp': 0.46717666178941725, 'temp_loss': 0.029123007840942593, 'time_step': 0.050031898021697996, 'td_error': 2.659602198599809, 'value_scale': 6.944093784263509, 'discounted_advantage': -3.071076855479014, 'initial_state': 8.488729476928711, 'diff_eval': 2180.464757416686} step=12000
2025-12-06 11:32.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.67it/s, critic_loss=-16.6, conservative_loss=-21.6, alpha=0.31, actor_loss=-4.59, temp=0.447, temp_loss=0.0196]


2025-12-06 11:32.55 [info     ] CalQL_20251206112128: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.005019392728805542, 'time_algorithm_update': 0.04233177661895752, 'critic_loss': -16.576522227287292, 'conservative_loss': -21.601161138534547, 'alpha': 0.310137225151062, 'actor_loss': -4.591654933214188, 'temp': 0.44719633808732034, 'temp_loss': 0.01961358024261426, 'time_step': 0.04765966176986694, 'td_error': 2.8254208167410626, 'value_scale': 6.864563094406745, 'discounted_advantage': -3.2375078003643973, 'initial_state': 7.92823600769043, 'diff_eval': 2425.091943283518} step=13000
2025-12-06 11:32.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.21it/s, critic_loss=-14.6, conservative_loss=-19.7, alpha=0.283, actor_loss=-4.6, temp=0.431, temp_loss=0.0144]


2025-12-06 11:33.48 [info     ] CalQL_20251206112128: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.005160084247589111, 'time_algorithm_update': 0.04333227849006653, 'critic_loss': -14.551995564460755, 'conservative_loss': -19.678362951278686, 'alpha': 0.28331972646713255, 'actor_loss': -4.5966266288757325, 'temp': 0.4313142459690571, 'temp_loss': 0.01458068348816596, 'time_step': 0.04878032326698303, 'td_error': 2.72428535797872, 'value_scale': 6.7619325589257455, 'discounted_advantage': -3.3289882760189182, 'initial_state': 8.195474624633789, 'diff_eval': 2087.7624760312947} step=14000
2025-12-06 11:33.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.18it/s, critic_loss=-12.7, conservative_loss=-18, alpha=0.259, actor_loss=-4.63, temp=0.418, temp_loss=0.0101]  


2025-12-06 11:34.41 [info     ] CalQL_20251206112128: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.005192460775375366, 'time_algorithm_update': 0.04330751729011536, 'critic_loss': -12.72017610836029, 'conservative_loss': -17.96592383003235, 'alpha': 0.258826557919383, 'actor_loss': -4.626919656276703, 'temp': 0.4181233030259609, 'temp_loss': 0.010260995043674484, 'time_step': 0.04878800582885742, 'td_error': 2.8635603199675486, 'value_scale': 6.74708250323693, 'discounted_advantage': -3.558135854795728, 'initial_state': 7.596097946166992, 'diff_eval': 2240.5453123336497} step=15000
2025-12-06 11:34.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.50it/s, critic_loss=-11.1, conservative_loss=-16.4, alpha=0.237, actor_loss=-4.66, temp=0.408, temp_loss=0.00702]


2025-12-06 11:35.34 [info     ] CalQL_20251206112128: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.005075712203979493, 'time_algorithm_update': 0.042670363426208495, 'critic_loss': -11.056061861515046, 'conservative_loss': -16.385540605545042, 'alpha': 0.23644167357683182, 'actor_loss': -4.659224359512329, 'temp': 0.407761708766222, 'temp_loss': 0.007266946586314589, 'time_step': 0.048045601844787594, 'td_error': 2.841189470429501, 'value_scale': 6.9701231335573155, 'discounted_advantage': -3.5410728992505858, 'initial_state': 7.970475673675537, 'diff_eval': 2203.4943967354848} step=16000
2025-12-06 11:35.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.56it/s, critic_loss=-9.45, conservative_loss=-14.9, alpha=0.216, actor_loss=-4.7, temp=0.4, temp_loss=0.00499]  


2025-12-06 11:36.26 [info     ] CalQL_20251206112128: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.005064418792724609, 'time_algorithm_update': 0.04253652882575989, 'critic_loss': -9.443594073295593, 'conservative_loss': -14.944941835403442, 'alpha': 0.21600505293905736, 'actor_loss': -4.699870559692383, 'temp': 0.3996581400334835, 'temp_loss': 0.004881728300824761, 'time_step': 0.047907519102096556, 'td_error': 2.9678158580256437, 'value_scale': 6.769283080373619, 'discounted_advantage': -3.867184463896, 'initial_state': 7.511504650115967, 'diff_eval': 2081.3921014437606} step=17000
2025-12-06 11:36.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.37it/s, critic_loss=-8.16, conservative_loss=-13.6, alpha=0.197, actor_loss=-4.75, temp=0.394, temp_loss=0.00477] 


2025-12-06 11:37.18 [info     ] CalQL_20251206112128: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.005080960273742676, 'time_algorithm_update': 0.0429898087978363, 'critic_loss': -8.15001664352417, 'conservative_loss': -13.622809339523316, 'alpha': 0.19733725966513158, 'actor_loss': -4.7491134707927705, 'temp': 0.3941305402517319, 'temp_loss': 0.004990054854424671, 'time_step': 0.04835741782188416, 'td_error': 2.895424645971252, 'value_scale': 6.792243976991434, 'discounted_advantage': -4.147918336248446, 'initial_state': 7.767892837524414, 'diff_eval': 1959.7118454988627} step=18000
2025-12-06 11:37.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.45it/s, critic_loss=-6.65, conservative_loss=-12.4, alpha=0.18, actor_loss=-4.83, temp=0.389, temp_loss=0.0027]  


2025-12-06 11:38.11 [info     ] CalQL_20251206112128: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.005121773004531861, 'time_algorithm_update': 0.04272290968894959, 'critic_loss': -6.648696875572204, 'conservative_loss': -12.403948865890502, 'alpha': 0.18029153153300284, 'actor_loss': -4.827048614263535, 'temp': 0.3893210053145885, 'temp_loss': 0.0027459002945106475, 'time_step': 0.04814140510559082, 'td_error': 3.0027572294357983, 'value_scale': 7.019464497736282, 'discounted_advantage': -4.03852743487285, 'initial_state': 8.104735374450684, 'diff_eval': 2056.3977279786313} step=19000
2025-12-06 11:38.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.31it/s, critic_loss=-5.44, conservative_loss=-11.3, alpha=0.165, actor_loss=-4.88, temp=0.385, temp_loss=0.00434]


2025-12-06 11:39.04 [info     ] CalQL_20251206112128: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.0051822237968444824, 'time_algorithm_update': 0.04305467462539673, 'critic_loss': -5.43782806968689, 'conservative_loss': -11.324004261016846, 'alpha': 0.16471616880595685, 'actor_loss': -4.878824564933777, 'temp': 0.38509920075535775, 'temp_loss': 0.004551274595316499, 'time_step': 0.048517225980758666, 'td_error': 3.0632485181729328, 'value_scale': 7.054547545773329, 'discounted_advantage': -4.091100854753853, 'initial_state': 8.055096626281738, 'diff_eval': 1863.7463779834163} step=20000
2025-12-06 11:39.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.46it/s, critic_loss=-4.46, conservative_loss=-10.3, alpha=0.151, actor_loss=-4.96, temp=0.38, temp_loss=0.00423]  


2025-12-06 11:39.56 [info     ] CalQL_20251206112128: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.00508665132522583, 'time_algorithm_update': 0.042810719013214114, 'critic_loss': -4.455702010631561, 'conservative_loss': -10.320145778656006, 'alpha': 0.15048249703645705, 'actor_loss': -4.964086904048919, 'temp': 0.3798482981026173, 'temp_loss': 0.004369778626831249, 'time_step': 0.04818283414840698, 'td_error': 3.2280630441595464, 'value_scale': 6.737970717894561, 'discounted_advantage': -3.4991167009525745, 'initial_state': 7.018570423126221, 'diff_eval': 1839.5459595320474} step=21000
2025-12-06 11:39.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.25it/s, critic_loss=-3.34, conservative_loss=-9.39, alpha=0.138, actor_loss=-5.02, temp=0.373, temp_loss=0.00589]


2025-12-06 11:40.49 [info     ] CalQL_20251206112128: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.005110856533050537, 'time_algorithm_update': 0.04318406581878662, 'critic_loss': -3.338032977104187, 'conservative_loss': -9.387305699348449, 'alpha': 0.13749439477920533, 'actor_loss': -5.018664164781571, 'temp': 0.3724725530743599, 'temp_loss': 0.006042081069434061, 'time_step': 0.048603010892868045, 'td_error': 3.0257111553370195, 'value_scale': 7.206555926963087, 'discounted_advantage': -4.127153466733076, 'initial_state': 8.705185890197754, 'diff_eval': 1939.462948225444} step=22000
2025-12-06 11:40.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.46it/s, critic_loss=-2.38, conservative_loss=-8.57, alpha=0.126, actor_loss=-5.15, temp=0.367, temp_loss=0.0024]


2025-12-06 11:41.42 [info     ] CalQL_20251206112128: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.005133151531219482, 'time_algorithm_update': 0.042715898752212524, 'critic_loss': -2.368934157371521, 'conservative_loss': -8.568096122264862, 'alpha': 0.12562457881867886, 'actor_loss': -5.14848551774025, 'temp': 0.36653277602791784, 'temp_loss': 0.002550098515348509, 'time_step': 0.048141076803207394, 'td_error': 3.165262239145901, 'value_scale': 7.223603555659412, 'discounted_advantage': -4.5834402040983155, 'initial_state': 8.515790939331055, 'diff_eval': 1875.8165131375729} step=23000
2025-12-06 11:41.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.74it/s, critic_loss=-1.46, conservative_loss=-7.8, alpha=0.115, actor_loss=-5.26, temp=0.364, temp_loss=0.00367] 


2025-12-06 11:42.33 [info     ] CalQL_20251206112128: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.005053240537643433, 'time_algorithm_update': 0.04219688367843628, 'critic_loss': -1.456535011768341, 'conservative_loss': -7.798771025657654, 'alpha': 0.11477654939889907, 'actor_loss': -5.268045904159546, 'temp': 0.36365918672084807, 'temp_loss': 0.003966142123565078, 'time_step': 0.04753784656524658, 'td_error': 3.2913636934598456, 'value_scale': 7.615788103677317, 'discounted_advantage': -4.529767225069266, 'initial_state': 8.97060775756836, 'diff_eval': 1820.0385861284033} step=24000
2025-12-06 11:42.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.68it/s, critic_loss=-0.52, conservative_loss=-7.1, alpha=0.105, actor_loss=-5.36, temp=0.358, temp_loss=0.00359] 


2025-12-06 11:43.25 [info     ] CalQL_20251206112128: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.0050824198722839355, 'time_algorithm_update': 0.04229622197151184, 'critic_loss': -0.5178092107772827, 'conservative_loss': -7.101316256523132, 'alpha': 0.10487037002295256, 'actor_loss': -5.361442912578583, 'temp': 0.35823256596922876, 'temp_loss': 0.0036400594502920287, 'time_step': 0.047669466018676755, 'td_error': 3.699020822327524, 'value_scale': 7.762005562824568, 'discounted_advantage': -4.815781773513649, 'initial_state': 8.274792671203613, 'diff_eval': 2011.8642578822314} step=25000
2025-12-06 11:43.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.30it/s, critic_loss=0.454, conservative_loss=-6.47, alpha=0.0959, actor_loss=-5.46, temp=0.352, temp_loss=0.00611]  


2025-12-06 11:44.18 [info     ] CalQL_20251206112128: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.00517911171913147, 'time_algorithm_update': 0.04308103084564209, 'critic_loss': 0.4552359700202942, 'conservative_loss': -6.464282508373261, 'alpha': 0.09582254561036825, 'actor_loss': -5.455597017288208, 'temp': 0.352411435931921, 'temp_loss': 0.006203532074345276, 'time_step': 0.048552916049957276, 'td_error': 3.4543013689805453, 'value_scale': 7.364253173308246, 'discounted_advantage': -4.6126011302754275, 'initial_state': 8.705756187438965, 'diff_eval': 2083.4975775103635} step=26000
2025-12-06 11:44.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.27it/s, critic_loss=1.26, conservative_loss=-5.89, alpha=0.0876, actor_loss=-5.57, temp=0.346, temp_loss=0.00322]  


2025-12-06 11:45.11 [info     ] CalQL_20251206112128: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.005150936841964722, 'time_algorithm_update': 0.043163458824157716, 'critic_loss': 1.264105531692505, 'conservative_loss': -5.890257734298706, 'alpha': 0.08755697382241487, 'actor_loss': -5.574021871566773, 'temp': 0.3462211569845676, 'temp_loss': 0.0030475509256357326, 'time_step': 0.048611329317092895, 'td_error': 3.523169461595009, 'value_scale': 7.602300755877894, 'discounted_advantage': -5.102230971826984, 'initial_state': 8.999992370605469, 'diff_eval': 1894.805620121433} step=27000
2025-12-06 11:45.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.42it/s, critic_loss=1.96, conservative_loss=-5.37, alpha=0.08, actor_loss=-5.67, temp=0.343, temp_loss=0.00343]  


2025-12-06 11:46.04 [info     ] CalQL_20251206112128: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.00513854193687439, 'time_algorithm_update': 0.04282232928276062, 'critic_loss': 1.9690920062065125, 'conservative_loss': -5.365946294784546, 'alpha': 0.07999859394133091, 'actor_loss': -5.670123066425323, 'temp': 0.3424838296175003, 'temp_loss': 0.0034791255390737206, 'time_step': 0.048255769729614255, 'td_error': 3.716554234584101, 'value_scale': 7.768971415697179, 'discounted_advantage': -4.930112151218038, 'initial_state': 9.191691398620605, 'diff_eval': 1796.6394121766064} step=28000
2025-12-06 11:46.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.35it/s, critic_loss=2.82, conservative_loss=-4.88, alpha=0.0731, actor_loss=-5.76, temp=0.338, temp_loss=0.00447]


2025-12-06 11:46.56 [info     ] CalQL_20251206112128: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.005103520631790161, 'time_algorithm_update': 0.042966517210006716, 'critic_loss': 2.8283017008304596, 'conservative_loss': -4.880387970924377, 'alpha': 0.07309774618595838, 'actor_loss': -5.764974987983703, 'temp': 0.3375136432349682, 'temp_loss': 0.0045128026412567124, 'time_step': 0.04838295435905456, 'td_error': 3.8303368077417344, 'value_scale': 7.6174921671136255, 'discounted_advantage': -5.760730394503788, 'initial_state': 9.059938430786133, 'diff_eval': 2066.1630565405903} step=29000
2025-12-06 11:46.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.34it/s, critic_loss=3.4, conservative_loss=-4.45, alpha=0.0668, actor_loss=-5.87, temp=0.334, temp_loss=0.000786] 


2025-12-06 11:47.49 [info     ] CalQL_20251206112128: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.005115634441375733, 'time_algorithm_update': 0.04301114201545715, 'critic_loss': 3.3986299922466277, 'conservative_loss': -4.452528839111328, 'alpha': 0.06679011385887862, 'actor_loss': -5.865974583148956, 'temp': 0.3340317488312721, 'temp_loss': 0.0004053789225872606, 'time_step': 0.0484207227230072, 'td_error': 4.158033757201413, 'value_scale': 7.960681335716403, 'discounted_advantage': -5.227410768624208, 'initial_state': 8.515661239624023, 'diff_eval': 1821.0297313381711} step=30000
2025-12-06 11:47.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.18it/s, critic_loss=4.17, conservative_loss=-4.05, alpha=0.0611, actor_loss=-5.93, temp=0.331, temp_loss=0.00418]


2025-12-06 11:48.42 [info     ] CalQL_20251206112128: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.005166121006011963, 'time_algorithm_update': 0.04335993528366089, 'critic_loss': 4.164243905782699, 'conservative_loss': -4.050215173244476, 'alpha': 0.06102661057561636, 'actor_loss': -5.928322031974792, 'temp': 0.3307600205242634, 'temp_loss': 0.004327412830782123, 'time_step': 0.048829885959625244, 'td_error': 4.181761899183892, 'value_scale': 7.879161811569567, 'discounted_advantage': -5.7735681483660795, 'initial_state': 8.585762977600098, 'diff_eval': 1865.4637560783967} step=31000
2025-12-06 11:48.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.19it/s, critic_loss=4.72, conservative_loss=-3.69, alpha=0.0558, actor_loss=-5.99, temp=0.327, temp_loss=0.00195]


2025-12-06 11:49.36 [info     ] CalQL_20251206112128: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.0051562881469726565, 'time_algorithm_update': 0.043331156730651854, 'critic_loss': 4.714262169122696, 'conservative_loss': -3.687063033103943, 'alpha': 0.05576377402245998, 'actor_loss': -5.986296577453613, 'temp': 0.32740317305922506, 'temp_loss': 0.0017085269208764657, 'time_step': 0.04878059411048889, 'td_error': 4.270695549802646, 'value_scale': 7.79792653600711, 'discounted_advantage': -5.529567221537233, 'initial_state': 8.856046676635742, 'diff_eval': 1902.7089814644632} step=32000
2025-12-06 11:49.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.77it/s, critic_loss=5.26, conservative_loss=-3.36, alpha=0.051, actor_loss=-6.05, temp=0.325, temp_loss=0.0018]  


2025-12-06 11:50.30 [info     ] CalQL_20251206112128: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.005489369869232178, 'time_algorithm_update': 0.04404986095428467, 'critic_loss': 5.251479135751724, 'conservative_loss': -3.3632771186828614, 'alpha': 0.05094945044443011, 'actor_loss': -6.046641589164734, 'temp': 0.3252880319952965, 'temp_loss': 0.0015500460093608127, 'time_step': 0.04983733654022217, 'td_error': 4.348152348715428, 'value_scale': 8.126666665301258, 'discounted_advantage': -5.961090817434994, 'initial_state': 9.319104194641113, 'diff_eval': 1896.7425830281993} step=33000
2025-12-06 11:50.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.90it/s, critic_loss=5.71, conservative_loss=-3.06, alpha=0.0466, actor_loss=-6.08, temp=0.323, temp_loss=0.00233]


2025-12-06 11:51.23 [info     ] CalQL_20251206112128: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.0052340452671051025, 'time_algorithm_update': 0.04388755035400391, 'critic_loss': 5.725744513988495, 'conservative_loss': -3.0615396947860716, 'alpha': 0.046552802126854656, 'actor_loss': -6.080417419910431, 'temp': 0.3228210764229298, 'temp_loss': 0.0023104300819104537, 'time_step': 0.04943289828300476, 'td_error': 4.61252231250208, 'value_scale': 8.420095285055698, 'discounted_advantage': -5.978073296685007, 'initial_state': 8.82075023651123, 'diff_eval': 2064.1223771251007} step=34000
2025-12-06 11:51.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.38it/s, critic_loss=6.21, conservative_loss=-2.79, alpha=0.0426, actor_loss=-6.13, temp=0.319, temp_loss=0.00302]


2025-12-06 11:52.16 [info     ] CalQL_20251206112128: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.005108495950698852, 'time_algorithm_update': 0.04292053985595703, 'critic_loss': 6.2018341019153596, 'conservative_loss': -2.790051359653473, 'alpha': 0.042535461988300084, 'actor_loss': -6.128671131134033, 'temp': 0.31945890334248545, 'temp_loss': 0.00298556600662414, 'time_step': 0.04832331538200378, 'td_error': 4.56387520450383, 'value_scale': 8.463411546740852, 'discounted_advantage': -5.572022060352333, 'initial_state': 9.288743019104004, 'diff_eval': 2221.4737476212917} step=35000
2025-12-06 11:52.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.85it/s, critic_loss=6.54, conservative_loss=-2.54, alpha=0.0389, actor_loss=-6.18, temp=0.316, temp_loss=0.00193]


2025-12-06 11:53.10 [info     ] CalQL_20251206112128: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.005299250602722168, 'time_algorithm_update': 0.043989496707916256, 'critic_loss': 6.551296122550965, 'conservative_loss': -2.5422357246875764, 'alpha': 0.03886436343565583, 'actor_loss': -6.175076159000397, 'temp': 0.31573873910307887, 'temp_loss': 0.0019110555972438306, 'time_step': 0.049577706575393676, 'td_error': 4.343463099518408, 'value_scale': 8.234372444269743, 'discounted_advantage': -5.43437696973227, 'initial_state': 10.624340057373047, 'diff_eval': 1960.3033230877547} step=36000
2025-12-06 11:53.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.64it/s, critic_loss=6.95, conservative_loss=-2.31, alpha=0.0355, actor_loss=-6.21, temp=0.313, temp_loss=0.00454]


2025-12-06 11:54.02 [info     ] CalQL_20251206112128: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.005039753913879394, 'time_algorithm_update': 0.042424387216567994, 'critic_loss': 6.960944669246674, 'conservative_loss': -2.3102062997817994, 'alpha': 0.03551123339310289, 'actor_loss': -6.2117827353477475, 'temp': 0.3126487841308117, 'temp_loss': 0.004719776903395541, 'time_step': 0.047756585121154785, 'td_error': 4.5112355936921364, 'value_scale': 8.30344879782227, 'discounted_advantage': -5.5733585241928845, 'initial_state': 9.961376190185547, 'diff_eval': 1999.4534747308403} step=37000
2025-12-06 11:54.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.61it/s, critic_loss=7.3, conservative_loss=-2.11, alpha=0.0325, actor_loss=-6.26, temp=0.307, temp_loss=0.00384]


2025-12-06 11:54.54 [info     ] CalQL_20251206112128: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.0050681445598602295, 'time_algorithm_update': 0.04246414303779602, 'critic_loss': 7.293853166103363, 'conservative_loss': -2.105593099236488, 'alpha': 0.03244919639267027, 'actor_loss': -6.257778700351715, 'temp': 0.30689024582505225, 'temp_loss': 0.004031832555076107, 'time_step': 0.04781838703155518, 'td_error': 4.71711349921788, 'value_scale': 8.408134345976169, 'discounted_advantage': -6.0606549186556125, 'initial_state': 10.242326736450195, 'diff_eval': 1963.6415838948787} step=38000
2025-12-06 11:54.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.62it/s, critic_loss=7.64, conservative_loss=-1.92, alpha=0.0297, actor_loss=-6.34, temp=0.303, temp_loss=0.00246]


2025-12-06 11:55.46 [info     ] CalQL_20251206112128: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.005058837652206421, 'time_algorithm_update': 0.042415611743927, 'critic_loss': 7.6302142460346225, 'conservative_loss': -1.918670628786087, 'alpha': 0.029648784367367627, 'actor_loss': -6.343468783855438, 'temp': 0.30325615391135213, 'temp_loss': 0.0024814934926107525, 'time_step': 0.047773826360702515, 'td_error': 4.887823654301705, 'value_scale': 8.425640658462967, 'discounted_advantage': -6.22836472979422, 'initial_state': 9.616517066955566, 'diff_eval': 2051.527695111474} step=39000
2025-12-06 11:55.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.37it/s, critic_loss=7.85, conservative_loss=-1.74, alpha=0.0271, actor_loss=-6.4, temp=0.3, temp_loss=0.00363]   


2025-12-06 11:56.39 [info     ] CalQL_20251206112128: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.005146904468536377, 'time_algorithm_update': 0.04297231721878052, 'critic_loss': 7.8695080658197405, 'conservative_loss': -1.743717547774315, 'alpha': 0.027092122957110405, 'actor_loss': -6.3986923828125, 'temp': 0.2995824427306652, 'temp_loss': 0.0036395536561030896, 'time_step': 0.04840791893005371, 'td_error': 5.138967162978795, 'value_scale': 8.410008887181876, 'discounted_advantage': -6.471658113807411, 'initial_state': 9.470767974853516, 'diff_eval': 1974.9366809886315} step=40000
2025-12-06 11:56.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.66it/s, critic_loss=8.31, conservative_loss=-1.59, alpha=0.0248, actor_loss=-6.5, temp=0.296, temp_loss=0.00284]  


2025-12-06 11:57.31 [info     ] CalQL_20251206112128: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.0050443470478057865, 'time_algorithm_update': 0.042401601552963256, 'critic_loss': 8.311866316080094, 'conservative_loss': -1.5862737489938736, 'alpha': 0.02475735712237656, 'actor_loss': -6.497930859088898, 'temp': 0.295872673869133, 'temp_loss': 0.0029136944646015765, 'time_step': 0.04773169636726379, 'td_error': 5.026836741788039, 'value_scale': 8.83477596297067, 'discounted_advantage': -6.309249927829343, 'initial_state': 10.065998077392578, 'diff_eval': 2313.498642373586} step=41000
2025-12-06 11:57.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.36it/s, critic_loss=8.56, conservative_loss=-1.44, alpha=0.0226, actor_loss=-6.61, temp=0.292, temp_loss=0.00314]


2025-12-06 11:58.24 [info     ] CalQL_20251206112128: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.005082303047180175, 'time_algorithm_update': 0.043023784399032594, 'critic_loss': 8.550307754516602, 'conservative_loss': -1.444307535171509, 'alpha': 0.022623737482354044, 'actor_loss': -6.613072824954987, 'temp': 0.2917926534116268, 'temp_loss': 0.0030771586060291158, 'time_step': 0.048393011808395385, 'td_error': 4.9704184850343, 'value_scale': 8.823011608216646, 'discounted_advantage': -6.710280208236494, 'initial_state': 10.622295379638672, 'diff_eval': 2327.156918587718} step=42000
2025-12-06 11:58.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.81it/s, critic_loss=8.92, conservative_loss=-1.32, alpha=0.0207, actor_loss=-6.76, temp=0.29, temp_loss=0.000663]


2025-12-06 11:59.16 [info     ] CalQL_20251206112128: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.0049851837158203125, 'time_algorithm_update': 0.04207787680625916, 'critic_loss': 8.919666212320328, 'conservative_loss': -1.315370866060257, 'alpha': 0.020672888230532406, 'actor_loss': -6.756576839447021, 'temp': 0.2898073124885559, 'temp_loss': 0.00044016355695202945, 'time_step': 0.047348086833953855, 'td_error': 5.048644882765439, 'value_scale': 9.220285184920296, 'discounted_advantage': -6.9593521615259375, 'initial_state': 11.170258522033691, 'diff_eval': 2340.097010297512} step=43000
2025-12-06 11:59.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:52<00:00, 19.12it/s, critic_loss=9.49, conservative_loss=-1.2, alpha=0.0189, actor_loss=-6.92, temp=0.289, temp_loss=0.00181]


2025-12-06 12:00.12 [info     ] CalQL_20251206112128: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.005170531034469604, 'time_algorithm_update': 0.04527219843864441, 'critic_loss': 9.487462849617005, 'conservative_loss': -1.199461176276207, 'alpha': 0.018888765471056102, 'actor_loss': -6.915275348186493, 'temp': 0.2886667321324348, 'temp_loss': 0.0019123405513819308, 'time_step': 0.05149606657028198, 'td_error': 5.1881929144152314, 'value_scale': 9.309463567455664, 'discounted_advantage': -6.468790496133652, 'initial_state': 11.129894256591797, 'diff_eval': 2273.808590005572} step=44000
2025-12-06 12:00.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.18it/s, critic_loss=9.7, conservative_loss=-1.09, alpha=0.0173, actor_loss=-7.09, temp=0.286, temp_loss=0.00255]


2025-12-06 12:01.05 [info     ] CalQL_20251206112128: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.005335997104644776, 'time_algorithm_update': 0.04313382601737976, 'critic_loss': 9.688304092407227, 'conservative_loss': -1.0908965649604798, 'alpha': 0.017258454974740742, 'actor_loss': -7.086656387805939, 'temp': 0.28610026156902313, 'temp_loss': 0.0028538957341806962, 'time_step': 0.04877981448173523, 'td_error': 5.224398554345372, 'value_scale': 9.544761658897537, 'discounted_advantage': -6.986013623394305, 'initial_state': 11.370246887207031, 'diff_eval': 2182.0008861729093} step=45000
2025-12-06 12:01.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.78it/s, critic_loss=10, conservative_loss=-0.993, alpha=0.0158, actor_loss=-7.24, temp=0.284, temp_loss=0.00173]  


2025-12-06 12:01.56 [info     ] CalQL_20251206112128: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.005022549867630005, 'time_algorithm_update': 0.042114883422851565, 'critic_loss': 10.053460297346115, 'conservative_loss': -0.9923773849010468, 'alpha': 0.01577120337355882, 'actor_loss': -7.235447901725769, 'temp': 0.2836461521089077, 'temp_loss': 0.0014175332189770415, 'time_step': 0.04743975949287414, 'td_error': 5.811103259674455, 'value_scale': 9.582920962129622, 'discounted_advantage': -7.679987487412995, 'initial_state': 10.259986877441406, 'diff_eval': 2310.006227187879} step=46000
2025-12-06 12:01.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.58it/s, critic_loss=10.4, conservative_loss=-0.904, alpha=0.0144, actor_loss=-7.43, temp=0.283, temp_loss=-0.00058]


2025-12-06 12:02.48 [info     ] CalQL_20251206112128: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.005066529750823974, 'time_algorithm_update': 0.042521889925003054, 'critic_loss': 10.39038952922821, 'conservative_loss': -0.9041018075346947, 'alpha': 0.014411500815302134, 'actor_loss': -7.430795255184173, 'temp': 0.2833971500992775, 'temp_loss': -0.000618247541715391, 'time_step': 0.04787687563896179, 'td_error': 5.67810278860243, 'value_scale': 10.271505453738587, 'discounted_advantage': -8.330520531336923, 'initial_state': 11.977094650268555, 'diff_eval': 2375.462975256927} step=47000
2025-12-06 12:02.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.69it/s, critic_loss=10.9, conservative_loss=-0.82, alpha=0.0132, actor_loss=-7.61, temp=0.282, temp_loss=0.00359]  


2025-12-06 12:03.40 [info     ] CalQL_20251206112128: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.005060646295547485, 'time_algorithm_update': 0.042265260219573976, 'critic_loss': 10.89889577150345, 'conservative_loss': -0.8197736060023307, 'alpha': 0.013169896118342876, 'actor_loss': -7.609551331996918, 'temp': 0.2819390850961208, 'temp_loss': 0.0036662860314827413, 'time_step': 0.047628418922424316, 'td_error': 6.122656923414101, 'value_scale': 10.650236956201017, 'discounted_advantage': -9.395151994392586, 'initial_state': 12.4314603805542, 'diff_eval': 2448.28566407804} step=48000
2025-12-06 12:03.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.56it/s, critic_loss=11.2, conservative_loss=-0.746, alpha=0.012, actor_loss=-7.81, temp=0.28, temp_loss=-0.00142]  


2025-12-06 12:04.32 [info     ] CalQL_20251206112128: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.005073676586151123, 'time_algorithm_update': 0.04255064249038696, 'critic_loss': 11.243981028079986, 'conservative_loss': -0.7456701251864434, 'alpha': 0.012036935630254448, 'actor_loss': -7.814610778808594, 'temp': 0.28017469683289525, 'temp_loss': -0.0015139885823009535, 'time_step': 0.04792323660850525, 'td_error': 6.749487704777457, 'value_scale': 10.823890674918616, 'discounted_advantage': -8.202858532926463, 'initial_state': 11.14749526977539, 'diff_eval': 2363.1472731622407} step=49000
2025-12-06 12:04.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.52it/s, critic_loss=11.5, conservative_loss=-0.677, alpha=0.011, actor_loss=-8, temp=0.281, temp_loss=0.00052]     


2025-12-06 12:05.25 [info     ] CalQL_20251206112128: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.005044691324234009, 'time_algorithm_update': 0.0426393506526947, 'critic_loss': 11.470938578128814, 'conservative_loss': -0.6766927074193955, 'alpha': 0.011001327235251665, 'actor_loss': -8.001632976055145, 'temp': 0.2805349362194538, 'temp_loss': 0.0005385097005637363, 'time_step': 0.04798551464080811, 'td_error': 6.66666948624682, 'value_scale': 11.22000744897983, 'discounted_advantage': -9.219368386405423, 'initial_state': 12.494741439819336, 'diff_eval': 2355.779253122387} step=50000
2025-12-06 12:05.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.48it/s, critic_loss=12.3, conservative_loss=-0.614, alpha=0.0101, actor_loss=-8.24, temp=0.281, temp_loss=0.000388]


2025-12-06 12:06.17 [info     ] CalQL_20251206112128: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.0050778288841247556, 'time_algorithm_update': 0.0427570059299469, 'critic_loss': 12.247194399356841, 'conservative_loss': -0.6138400293588638, 'alpha': 0.010055731829255819, 'actor_loss': -8.238347382545472, 'temp': 0.28095547860860826, 'temp_loss': 0.0006192298821406438, 'time_step': 0.04812415480613708, 'td_error': 6.722839580313382, 'value_scale': 12.161400833067992, 'discounted_advantage': -10.254494142477826, 'initial_state': 14.606803894042969, 'diff_eval': 2415.715371523616} step=51000
2025-12-06 12:06.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.98it/s, critic_loss=12.7, conservative_loss=-0.56, alpha=0.00919, actor_loss=-8.46, temp=0.279, temp_loss=0.000463]


2025-12-06 12:07.08 [info     ] CalQL_20251206112128: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.004938534021377563, 'time_algorithm_update': 0.04174657583236694, 'critic_loss': 12.658465446949005, 'conservative_loss': -0.559603166103363, 'alpha': 0.009190910870209337, 'actor_loss': -8.462474153995514, 'temp': 0.27904301169514656, 'temp_loss': 0.0005184233202598988, 'time_step': 0.0469703209400177, 'td_error': 7.585937507328134, 'value_scale': 12.207374008172579, 'discounted_advantage': -9.578944338618099, 'initial_state': 12.672119140625, 'diff_eval': 2494.0677814475166} step=52000
2025-12-06 12:07.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.24it/s, critic_loss=13.3, conservative_loss=-0.51, alpha=0.0084, actor_loss=-8.73, temp=0.28, temp_loss=-0.00214]   


2025-12-06 12:08.02 [info     ] CalQL_20251206112128: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.005046828031539917, 'time_algorithm_update': 0.043303265810012814, 'critic_loss': 13.349145764827728, 'conservative_loss': -0.5094975782334804, 'alpha': 0.008398518000729382, 'actor_loss': -8.730553051948547, 'temp': 0.2799707051217556, 'temp_loss': -0.002163198600988835, 'time_step': 0.04864161992073059, 'td_error': 7.311972703912157, 'value_scale': 12.46264236538272, 'discounted_advantage': -10.350891930135536, 'initial_state': 14.275080680847168, 'diff_eval': 2473.465424596756} step=53000
2025-12-06 12:08.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.50it/s, critic_loss=14.1, conservative_loss=-0.461, alpha=0.00768, actor_loss=-8.94, temp=0.282, temp_loss=0.000249]


2025-12-06 12:08.54 [info     ] CalQL_20251206112128: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.005046568632125855, 'time_algorithm_update': 0.042697216033935545, 'critic_loss': 14.046749162197113, 'conservative_loss': -0.460890554100275, 'alpha': 0.007675687882117927, 'actor_loss': -8.939677948474884, 'temp': 0.2820704967677593, 'temp_loss': 0.000290639384300448, 'time_step': 0.04804169940948486, 'td_error': 7.6229408706967385, 'value_scale': 12.653057578186077, 'discounted_advantage': -11.284420499160994, 'initial_state': 14.607662200927734, 'diff_eval': 2570.9938276050957} step=54000
2025-12-06 12:08.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.75it/s, critic_loss=14.5, conservative_loss=-0.419, alpha=0.00702, actor_loss=-9.13, temp=0.282, temp_loss=-0.00195]


2025-12-06 12:09.46 [info     ] CalQL_20251206112128: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.005008284091949463, 'time_algorithm_update': 0.04219976115226746, 'critic_loss': 14.501986444950104, 'conservative_loss': -0.4191595772206783, 'alpha': 0.007016538180410862, 'actor_loss': -9.12663962507248, 'temp': 0.28225112503767014, 'temp_loss': -0.0020317948579322547, 'time_step': 0.04750332522392273, 'td_error': 8.044729825218358, 'value_scale': 12.706580355035083, 'discounted_advantage': -10.800778995457252, 'initial_state': 14.105661392211914, 'diff_eval': 2515.1494134980126} step=55000
2025-12-06 12:09.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.48it/s, critic_loss=15.1, conservative_loss=-0.381, alpha=0.00642, actor_loss=-9.29, temp=0.284, temp_loss=-0.000307]


2025-12-06 12:10.38 [info     ] CalQL_20251206112128: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.00501164984703064, 'time_algorithm_update': 0.04283094024658203, 'critic_loss': 15.057905346393586, 'conservative_loss': -0.38120555061101913, 'alpha': 0.006412643365096301, 'actor_loss': -9.289280764579773, 'temp': 0.2843408424556255, 'temp_loss': -0.00019802775559946896, 'time_step': 0.04812801933288574, 'td_error': 8.535550509679547, 'value_scale': 13.050301113806443, 'discounted_advantage': -11.13919685881187, 'initial_state': 13.690576553344727, 'diff_eval': 2719.6947989444643} step=56000
2025-12-06 12:10.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.30it/s, critic_loss=15.3, conservative_loss=-0.347, alpha=0.00586, actor_loss=-9.28, temp=0.283, temp_loss=0.00175]


2025-12-06 12:11.31 [info     ] CalQL_20251206112128: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.005169823408126831, 'time_algorithm_update': 0.043092836380004884, 'critic_loss': 15.296926355361938, 'conservative_loss': -0.3468477137386799, 'alpha': 0.005860663410741836, 'actor_loss': -9.281691299915314, 'temp': 0.28341988506913185, 'temp_loss': 0.001700906979269348, 'time_step': 0.04855042719841003, 'td_error': 7.918112992903818, 'value_scale': 13.334826407739417, 'discounted_advantage': -11.298547826118782, 'initial_state': 16.330299377441406, 'diff_eval': 2754.006755661018} step=57000
2025-12-06 12:11.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.71it/s, critic_loss=15.4, conservative_loss=-0.316, alpha=0.00536, actor_loss=-9.26, temp=0.282, temp_loss=0.0013]  


2025-12-06 12:12.23 [info     ] CalQL_20251206112128: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.0049813477993011475, 'time_algorithm_update': 0.04228007054328919, 'critic_loss': 15.438811913490296, 'conservative_loss': -0.316249629765749, 'alpha': 0.005356140690390021, 'actor_loss': -9.255208319664002, 'temp': 0.281974144756794, 'temp_loss': 0.001361428764415905, 'time_step': 0.04756358218193054, 'td_error': 8.48386451808396, 'value_scale': 12.686245268678382, 'discounted_advantage': -10.68670551624347, 'initial_state': 14.503314971923828, 'diff_eval': 2600.8174245967853} step=58000
2025-12-06 12:12.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.73it/s, critic_loss=15.7, conservative_loss=-0.289, alpha=0.0049, actor_loss=-9.11, temp=0.278, temp_loss=0.00293]


2025-12-06 12:13.14 [info     ] CalQL_20251206112128: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.005022351741790772, 'time_algorithm_update': 0.04225451350212097, 'critic_loss': 15.698899155139923, 'conservative_loss': -0.2883926374912262, 'alpha': 0.004894086332526058, 'actor_loss': -9.107245157241822, 'temp': 0.27832173031568525, 'temp_loss': 0.0031459610305028035, 'time_step': 0.0475649094581604, 'td_error': 8.306704078441467, 'value_scale': 12.173775802991742, 'discounted_advantage': -9.763510414611877, 'initial_state': 14.324593544006348, 'diff_eval': 2642.19589027564} step=59000
2025-12-06 12:13.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.27it/s, critic_loss=15.5, conservative_loss=-0.263, alpha=0.00447, actor_loss=-8.89, temp=0.275, temp_loss=0.00242]


2025-12-06 12:14.07 [info     ] CalQL_20251206112128: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.005186746597290039, 'time_algorithm_update': 0.04306825017929077, 'critic_loss': 15.483024219036102, 'conservative_loss': -0.2628764000982046, 'alpha': 0.0044709990601986644, 'actor_loss': -8.894341167449952, 'temp': 0.27481369554996493, 'temp_loss': 0.0024951413091039284, 'time_step': 0.04857109951972961, 'td_error': 7.724225562874311, 'value_scale': 12.35974463507961, 'discounted_advantage': -10.377098442678209, 'initial_state': 16.560400009155273, 'diff_eval': 2893.712031325723} step=60000
2025-12-06 12:14.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.66it/s, critic_loss=15.5, conservative_loss=-0.24, alpha=0.00409, actor_loss=-8.71, temp=0.273, temp_loss=0.00155]  


2025-12-06 12:14.59 [info     ] CalQL_20251206112128: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.005033520936965942, 'time_algorithm_update': 0.04237553548812866, 'critic_loss': 15.466779828071594, 'conservative_loss': -0.24007259196043015, 'alpha': 0.004084785250946879, 'actor_loss': -8.71357373714447, 'temp': 0.2732523396909237, 'temp_loss': 0.0015883110949071124, 'time_step': 0.04769086933135986, 'td_error': 7.647850130596445, 'value_scale': 11.5746274530694, 'discounted_advantage': -9.385192086278632, 'initial_state': 14.957076072692871, 'diff_eval': 2795.999095357279} step=61000
2025-12-06 12:14.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.59it/s, critic_loss=14.9, conservative_loss=-0.219, alpha=0.00373, actor_loss=-8.52, temp=0.271, temp_loss=0.00196] 


2025-12-06 12:15.51 [info     ] CalQL_20251206112128: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.005132888317108154, 'time_algorithm_update': 0.04243814969062805, 'critic_loss': 14.914158523082733, 'conservative_loss': -0.2189746677726507, 'alpha': 0.0037317754386458546, 'actor_loss': -8.522203712463378, 'temp': 0.27095857056975364, 'temp_loss': 0.002039042360265739, 'time_step': 0.04785500311851502, 'td_error': 7.794409249271599, 'value_scale': 11.582286763950332, 'discounted_advantage': -8.642507108308818, 'initial_state': 13.362883567810059, 'diff_eval': 2704.6564249407143} step=62000
2025-12-06 12:15.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.55it/s, critic_loss=14.8, conservative_loss=-0.2, alpha=0.00341, actor_loss=-8.35, temp=0.267, temp_loss=0.00346] 


2025-12-06 12:16.43 [info     ] CalQL_20251206112128: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.005088143110275269, 'time_algorithm_update': 0.04258957099914551, 'critic_loss': 14.753191435337067, 'conservative_loss': -0.19979084721207618, 'alpha': 0.003409275196027011, 'actor_loss': -8.353439536571504, 'temp': 0.2674560759663582, 'temp_loss': 0.003466579666011967, 'time_step': 0.04796870398521423, 'td_error': 7.221422598111628, 'value_scale': 11.352267008284162, 'discounted_advantage': -9.072076757393477, 'initial_state': 14.995954513549805, 'diff_eval': 3032.3370584586883} step=63000
2025-12-06 12:16.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.64it/s, critic_loss=14.3, conservative_loss=-0.183, alpha=0.00312, actor_loss=-8.22, temp=0.266, temp_loss=0.00223] 


2025-12-06 12:17.35 [info     ] CalQL_20251206112128: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.005042693376541138, 'time_algorithm_update': 0.0424123547077179, 'critic_loss': 14.30667424249649, 'conservative_loss': -0.18299213141202927, 'alpha': 0.0031139604351483286, 'actor_loss': -8.217214186191558, 'temp': 0.26629786917567255, 'temp_loss': 0.0023243399279890583, 'time_step': 0.04775787997245789, 'td_error': 7.044491429493581, 'value_scale': 10.619212998886033, 'discounted_advantage': -8.425088552628901, 'initial_state': 14.092141151428223, 'diff_eval': 2870.333904401546} step=64000
2025-12-06 12:17.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.15it/s, critic_loss=13.9, conservative_loss=-0.167, alpha=0.00285, actor_loss=-8.09, temp=0.262, temp_loss=0.00357] 


2025-12-06 12:18.29 [info     ] CalQL_20251206112128: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.004947432041168213, 'time_algorithm_update': 0.043304250478744505, 'critic_loss': 13.928152542114258, 'conservative_loss': -0.16709242361783982, 'alpha': 0.002844585941405967, 'actor_loss': -8.087638309001923, 'temp': 0.26158898705244066, 'temp_loss': 0.003523878484382294, 'time_step': 0.04889901328086853, 'td_error': 6.829612099184471, 'value_scale': 10.368208737817515, 'discounted_advantage': -8.180372628579548, 'initial_state': 13.73147201538086, 'diff_eval': 2804.54778937635} step=65000
2025-12-06 12:18.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.11it/s, critic_loss=13.8, conservative_loss=-0.153, alpha=0.0026, actor_loss=-8.02, temp=0.258, temp_loss=0.00423]


2025-12-06 12:19.22 [info     ] CalQL_20251206112128: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.005085011005401611, 'time_algorithm_update': 0.04362281632423401, 'critic_loss': 13.827921376228332, 'conservative_loss': -0.15291186894476413, 'alpha': 0.0025980382857378574, 'actor_loss': -8.020254392147065, 'temp': 0.2576376799941063, 'temp_loss': 0.004335077390656807, 'time_step': 0.0490154275894165, 'td_error': 7.151356099059022, 'value_scale': 10.533735271699005, 'discounted_advantage': -8.123129991869273, 'initial_state': 13.283832550048828, 'diff_eval': 2868.408982572153} step=66000
2025-12-06 12:19.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.71it/s, critic_loss=13.7, conservative_loss=-0.139, alpha=0.00237, actor_loss=-7.95, temp=0.253, temp_loss=0.0039]  


2025-12-06 12:20.14 [info     ] CalQL_20251206112128: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.004977112293243408, 'time_algorithm_update': 0.042294399976730346, 'critic_loss': 13.69699741268158, 'conservative_loss': -0.1387833117246628, 'alpha': 0.002373580942628905, 'actor_loss': -7.950460304260254, 'temp': 0.25288314774632453, 'temp_loss': 0.0039626285072881726, 'time_step': 0.04757536554336548, 'td_error': 6.659574207654406, 'value_scale': 10.117028154788752, 'discounted_advantage': -8.040285279786724, 'initial_state': 12.831582069396973, 'diff_eval': 2978.160656019526} step=67000
2025-12-06 12:20.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.41it/s, critic_loss=13.5, conservative_loss=-0.127, alpha=0.00217, actor_loss=-7.97, temp=0.249, temp_loss=0.00303]


2025-12-06 12:21.06 [info     ] CalQL_20251206112128: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.0051168277263641355, 'time_algorithm_update': 0.04287346982955933, 'critic_loss': 13.495340876102448, 'conservative_loss': -0.1270999843031168, 'alpha': 0.0021683777472935616, 'actor_loss': -7.969450350761414, 'temp': 0.2492502307742834, 'temp_loss': 0.0029642998162889853, 'time_step': 0.04827017784118652, 'td_error': 6.757742995855738, 'value_scale': 10.346528100033664, 'discounted_advantage': -7.990431347897891, 'initial_state': 13.225811004638672, 'diff_eval': 2996.550422488925} step=68000
2025-12-06 12:21.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.48it/s, critic_loss=13.3, conservative_loss=-0.117, alpha=0.00198, actor_loss=-7.88, temp=0.245, temp_loss=0.00177]


2025-12-06 12:21.59 [info     ] CalQL_20251206112128: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.004975357532501221, 'time_algorithm_update': 0.04286068511009216, 'critic_loss': 13.28049585533142, 'conservative_loss': -0.11666524059325456, 'alpha': 0.0019805740468436853, 'actor_loss': -7.881754081726074, 'temp': 0.24548415677249433, 'temp_loss': 0.0017020925987744704, 'time_step': 0.04812650179862976, 'td_error': 6.13068182913164, 'value_scale': 9.970579155545122, 'discounted_advantage': -6.882476485599869, 'initial_state': 13.445755958557129, 'diff_eval': 2850.624318454796} step=69000
2025-12-06 12:21.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.68it/s, critic_loss=13.1, conservative_loss=-0.106, alpha=0.00181, actor_loss=-7.9, temp=0.243, temp_loss=0.00408]


2025-12-06 12:22.51 [info     ] CalQL_20251206112128: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.00502622127532959, 'time_algorithm_update': 0.042326852083206176, 'critic_loss': 13.111534509897233, 'conservative_loss': -0.10634847215563059, 'alpha': 0.0018086902368813754, 'actor_loss': -7.894645313739777, 'temp': 0.24251633821427823, 'temp_loss': 0.004003916038665921, 'time_step': 0.047640682458877565, 'td_error': 6.403513009275903, 'value_scale': 9.827547146430534, 'discounted_advantage': -8.110744266801905, 'initial_state': 13.704782485961914, 'diff_eval': 2963.1717810316586} step=70000
2025-12-06 12:22.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.70it/s, critic_loss=12.8, conservative_loss=-0.0973, alpha=0.00165, actor_loss=-7.87, temp=0.239, temp_loss=0.0037] 


2025-12-06 12:23.42 [info     ] CalQL_20251206112128: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.005042687654495239, 'time_algorithm_update': 0.042269987106323244, 'critic_loss': 12.780909901857376, 'conservative_loss': -0.09723023782670498, 'alpha': 0.0016520442529581487, 'actor_loss': -7.865327991962433, 'temp': 0.23932993765175342, 'temp_loss': 0.0038938793542329223, 'time_step': 0.0476025927066803, 'td_error': 6.2780712898904865, 'value_scale': 9.762106426921653, 'discounted_advantage': -8.281710941699002, 'initial_state': 13.232006072998047, 'diff_eval': 2974.9126359265915} step=71000
2025-12-06 12:23.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.50it/s, critic_loss=12.9, conservative_loss=-0.0889, alpha=0.00151, actor_loss=-7.89, temp=0.236, temp_loss=0.00148] 


2025-12-06 12:24.35 [info     ] CalQL_20251206112128: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.005073024034500122, 'time_algorithm_update': 0.04270750761032104, 'critic_loss': 12.906284278392791, 'conservative_loss': -0.08886076633632183, 'alpha': 0.00150898994167801, 'actor_loss': -7.886867606163025, 'temp': 0.23620645880699156, 'temp_loss': 0.0016445508329197764, 'time_step': 0.048071603059768674, 'td_error': 6.758408276682173, 'value_scale': 9.606128104002982, 'discounted_advantage': -5.977767397896358, 'initial_state': 11.986550331115723, 'diff_eval': 2867.0798573012776} step=72000
2025-12-06 12:24.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.79it/s, critic_loss=12.9, conservative_loss=-0.0809, alpha=0.00138, actor_loss=-7.92, temp=0.23, temp_loss=0.00694]


2025-12-06 12:25.26 [info     ] CalQL_20251206112128: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.005010124444961548, 'time_algorithm_update': 0.042132465839385985, 'critic_loss': 12.923836228370666, 'conservative_loss': -0.08087538109719754, 'alpha': 0.0013786044204607607, 'actor_loss': -7.915574873924255, 'temp': 0.23002828115224838, 'temp_loss': 0.006828215529443696, 'time_step': 0.04740814733505249, 'td_error': 6.325706685606578, 'value_scale': 9.74222050049777, 'discounted_advantage': -8.20721348192398, 'initial_state': 11.915875434875488, 'diff_eval': 3055.0192670638567} step=73000
2025-12-06 12:25.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.70it/s, critic_loss=12.8, conservative_loss=-0.0741, alpha=0.00126, actor_loss=-7.97, temp=0.228, temp_loss=0.000443]


2025-12-06 12:26.18 [info     ] CalQL_20251206112128: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.005054724454879761, 'time_algorithm_update': 0.04223852872848511, 'critic_loss': 12.762741237163544, 'conservative_loss': -0.07409593329578638, 'alpha': 0.0012591964665334672, 'actor_loss': -7.972379549980164, 'temp': 0.22751365162432194, 'temp_loss': 0.0004915484833763913, 'time_step': 0.047589954853057864, 'td_error': 6.7633734449690035, 'value_scale': 10.135443955923728, 'discounted_advantage': -8.084750458507806, 'initial_state': 12.052392959594727, 'diff_eval': 2973.346855574856} step=74000
2025-12-06 12:26.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.15it/s, critic_loss=12.9, conservative_loss=-0.0675, alpha=0.00115, actor_loss=-8.04, temp=0.227, temp_loss=0.00192] 


2025-12-06 12:27.11 [info     ] CalQL_20251206112128: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.005204373359680175, 'time_algorithm_update': 0.043332122087478636, 'critic_loss': 12.91331395149231, 'conservative_loss': -0.06751234473660589, 'alpha': 0.00115023323148489, 'actor_loss': -8.04459106874466, 'temp': 0.22688258638978004, 'temp_loss': 0.0018190678366227075, 'time_step': 0.04884496378898621, 'td_error': 6.325566724549382, 'value_scale': 10.840344349457903, 'discounted_advantage': -8.083756050869123, 'initial_state': 13.838550567626953, 'diff_eval': 2945.0762694271857} step=75000
2025-12-06 12:27.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.05it/s, critic_loss=12.8, conservative_loss=-0.0615, alpha=0.00105, actor_loss=-8.11, temp=0.223, temp_loss=0.00321]


2025-12-06 12:28.05 [info     ] CalQL_20251206112128: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.0052183189392089845, 'time_algorithm_update': 0.04354767870903015, 'critic_loss': 12.811309659481049, 'conservative_loss': -0.061494829021394255, 'alpha': 0.001050960317486897, 'actor_loss': -8.113568148136139, 'temp': 0.22269317972660066, 'temp_loss': 0.0033043320215074344, 'time_step': 0.0490776002407074, 'td_error': 6.3618549927635994, 'value_scale': 9.955544206011675, 'discounted_advantage': -8.09979663007974, 'initial_state': 13.462250709533691, 'diff_eval': 3033.2435057358903} step=76000
2025-12-06 12:28.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.62it/s, critic_loss=12.9, conservative_loss=-0.0563, alpha=0.00096, actor_loss=-8.14, temp=0.221, temp_loss=0.000594]


2025-12-06 12:28.57 [info     ] CalQL_20251206112128: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.005088286161422729, 'time_algorithm_update': 0.04235559749603272, 'critic_loss': 12.84772916841507, 'conservative_loss': -0.056277480639517305, 'alpha': 0.0009600402491050773, 'actor_loss': -8.144914368152618, 'temp': 0.22083052864670755, 'temp_loss': 0.0006130881356075406, 'time_step': 0.0477401442527771, 'td_error': 6.4413557071134635, 'value_scale': 9.987238863519371, 'discounted_advantage': -8.009359915397555, 'initial_state': 12.989394187927246, 'diff_eval': 3108.106299829703} step=77000
2025-12-06 12:28.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.24it/s, critic_loss=12.8, conservative_loss=-0.0513, alpha=0.000877, actor_loss=-8.18, temp=0.221, temp_loss=-0.00126]


2025-12-06 12:29.51 [info     ] CalQL_20251206112128: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.005064188003540039, 'time_algorithm_update': 0.043337575912475584, 'critic_loss': 12.799795342922211, 'conservative_loss': -0.051290572732686994, 'alpha': 0.0008770330910920166, 'actor_loss': -8.178962236881256, 'temp': 0.2212211436033249, 'temp_loss': -0.0010061547452351078, 'time_step': 0.048692986011505124, 'td_error': 6.5108963262014425, 'value_scale': 10.02686580405348, 'discounted_advantage': -7.821129425470405, 'initial_state': 12.997160911560059, 'diff_eval': 3020.2934865810053} step=78000
2025-12-06 12:29.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:50<00:00, 20.00it/s, critic_loss=12.8, conservative_loss=-0.0468, alpha=0.000802, actor_loss=-8.29, temp=0.22, temp_loss=0.00348] 


2025-12-06 12:30.44 [info     ] CalQL_20251206112128: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.0050942664146423336, 'time_algorithm_update': 0.04377909374237061, 'critic_loss': 12.847908796787262, 'conservative_loss': -0.046766986537724736, 'alpha': 0.000801222396781668, 'actor_loss': -8.285194042205811, 'temp': 0.22012515057623386, 'temp_loss': 0.003512584695359692, 'time_step': 0.0491817135810852, 'td_error': 6.244154309738758, 'value_scale': 10.238011106751916, 'discounted_advantage': -8.020524350810444, 'initial_state': 14.458626747131348, 'diff_eval': 3000.6350591461064} step=79000
2025-12-06 12:30.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.92it/s, critic_loss=13.2, conservative_loss=-0.0426, alpha=0.000732, actor_loss=-8.35, temp=0.217, temp_loss=0.00274] 


2025-12-06 12:31.36 [info     ] CalQL_20251206112128: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.004925641059875488, 'time_algorithm_update': 0.04187061357498169, 'critic_loss': 13.185951422691344, 'conservative_loss': -0.04258957926928997, 'alpha': 0.0007319458082783967, 'actor_loss': -8.351987264156342, 'temp': 0.21709310306608676, 'temp_loss': 0.002819348171236925, 'time_step': 0.04709197235107422, 'td_error': 6.383876669324575, 'value_scale': 10.231694546138613, 'discounted_advantage': -7.547088892032301, 'initial_state': 13.581369400024414, 'diff_eval': 3129.8643892056534} step=80000
2025-12-06 12:31.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.73it/s, critic_loss=13, conservative_loss=-0.0388, alpha=0.000669, actor_loss=-8.4, temp=0.213, temp_loss=0.00355]  


2025-12-06 12:32.28 [info     ] CalQL_20251206112128: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.005009083747863769, 'time_algorithm_update': 0.042211743116378785, 'critic_loss': 12.968695869922637, 'conservative_loss': -0.03877513790130615, 'alpha': 0.0006688825525925495, 'actor_loss': -8.402453455924988, 'temp': 0.21312937918305397, 'temp_loss': 0.0036324329229537396, 'time_step': 0.04751841282844543, 'td_error': 6.733508382184804, 'value_scale': 10.19145930141729, 'discounted_advantage': -7.704503507266717, 'initial_state': 12.162054061889648, 'diff_eval': 3156.6097217904226} step=81000
2025-12-06 12:32.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.39it/s, critic_loss=13.1, conservative_loss=-0.0354, alpha=0.000611, actor_loss=-8.53, temp=0.211, temp_loss=0.00111] 


2025-12-06 12:33.20 [info     ] CalQL_20251206112128: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.0051255011558532714, 'time_algorithm_update': 0.04290339016914368, 'critic_loss': 13.104481759548188, 'conservative_loss': -0.03539068160019815, 'alpha': 0.0006111273391288705, 'actor_loss': -8.535552050113678, 'temp': 0.21103070576488972, 'temp_loss': 0.0009196656458079815, 'time_step': 0.048322608470916746, 'td_error': 6.195519035651107, 'value_scale': 10.198854297951444, 'discounted_advantage': -7.719910958894161, 'initial_state': 13.960880279541016, 'diff_eval': 3081.1069185238416} step=82000
2025-12-06 12:33.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.85it/s, critic_loss=13.2, conservative_loss=-0.0323, alpha=0.000559, actor_loss=-8.59, temp=0.207, temp_loss=0.00492]


2025-12-06 12:34.12 [info     ] CalQL_20251206112128: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.005007707595825196, 'time_algorithm_update': 0.04198748016357422, 'critic_loss': 13.228084305286407, 'conservative_loss': -0.03228627554140985, 'alpha': 0.0005583412015112116, 'actor_loss': -8.587236624717713, 'temp': 0.2068945789784193, 'temp_loss': 0.004921870696940459, 'time_step': 0.04726688194274902, 'td_error': 6.450055825854041, 'value_scale': 10.214205115775938, 'discounted_advantage': -8.292426741472019, 'initial_state': 13.428433418273926, 'diff_eval': 3135.4871043782996} step=83000
2025-12-06 12:34.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.33it/s, critic_loss=13.3, conservative_loss=-0.0295, alpha=0.00051, actor_loss=-8.72, temp=0.204, temp_loss=0.00172]  


2025-12-06 12:35.04 [info     ] CalQL_20251206112128: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.005132824659347534, 'time_algorithm_update': 0.04303687286376953, 'critic_loss': 13.289906711101532, 'conservative_loss': -0.029443990206345916, 'alpha': 0.0005101012640225236, 'actor_loss': -8.720640534877777, 'temp': 0.20386917351186276, 'temp_loss': 0.0016262896549887956, 'time_step': 0.04845719599723816, 'td_error': 6.287585699450465, 'value_scale': 10.090919715627928, 'discounted_advantage': -8.455450069908656, 'initial_state': 13.555744171142578, 'diff_eval': 3066.6754817532183} step=84000
2025-12-06 12:35.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.49it/s, critic_loss=13.5, conservative_loss=-0.0268, alpha=0.000466, actor_loss=-8.87, temp=0.203, temp_loss=0.000608]


2025-12-06 12:35.57 [info     ] CalQL_20251206112128: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.005082435131072998, 'time_algorithm_update': 0.04268330979347229, 'critic_loss': 13.512072889328003, 'conservative_loss': -0.02684041148982942, 'alpha': 0.0004660364383016713, 'actor_loss': -8.871282047271729, 'temp': 0.20295353220403195, 'temp_loss': 0.0006441906106192619, 'time_step': 0.04807291030883789, 'td_error': 6.384583571070366, 'value_scale': 10.293143322864626, 'discounted_advantage': -8.297990065453446, 'initial_state': 13.022470474243164, 'diff_eval': 3213.6571216901048} step=85000
2025-12-06 12:35.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.55it/s, critic_loss=13.3, conservative_loss=-0.0244, alpha=0.000426, actor_loss=-9, temp=0.202, temp_loss=0.000883]   


2025-12-06 12:36.49 [info     ] CalQL_20251206112128: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.005153284072875976, 'time_algorithm_update': 0.042513098716735837, 'critic_loss': 13.311539407253266, 'conservative_loss': -0.024377168025821447, 'alpha': 0.00042582620290340856, 'actor_loss': -8.999253616333007, 'temp': 0.20182412359118462, 'temp_loss': 0.0009067563681164757, 'time_step': 0.047958478927612305, 'td_error': 6.60551541992488, 'value_scale': 9.832616079923419, 'discounted_advantage': -7.12194005562947, 'initial_state': 12.449592590332031, 'diff_eval': 3101.0598506557717} step=86000
2025-12-06 12:36.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.12it/s, critic_loss=13.9, conservative_loss=-0.0222, alpha=0.000389, actor_loss=-9.15, temp=0.201, temp_loss=0.0006]  


2025-12-06 12:37.42 [info     ] CalQL_20251206112128: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.005476431608200073, 'time_algorithm_update': 0.04315992212295532, 'critic_loss': 13.854750806331635, 'conservative_loss': -0.022232677415013315, 'alpha': 0.00038912045958568344, 'actor_loss': -9.157175696849823, 'temp': 0.20113362300395965, 'temp_loss': 0.0007521494949469343, 'time_step': 0.048942500114440915, 'td_error': 6.659445998376964, 'value_scale': 10.724009163095323, 'discounted_advantage': -8.180617977783818, 'initial_state': 14.443380355834961, 'diff_eval': 3229.7088460368755} step=87000
2025-12-06 12:37.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.17it/s, critic_loss=14.1, conservative_loss=-0.0202, alpha=0.000356, actor_loss=-9.26, temp=0.199, temp_loss=0.00117]


2025-12-06 12:38.35 [info     ] CalQL_20251206112128: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.005090349197387696, 'time_algorithm_update': 0.04349510669708252, 'critic_loss': 14.126251408576966, 'conservative_loss': -0.020193658236414194, 'alpha': 0.0003555914833850693, 'actor_loss': -9.265777173042297, 'temp': 0.19882990171015263, 'temp_loss': 0.0012137001376831904, 'time_step': 0.048875309228897094, 'td_error': 6.655677733505713, 'value_scale': 10.054640092486508, 'discounted_advantage': -9.341017389052585, 'initial_state': 14.848337173461914, 'diff_eval': 3231.9531437601227} step=88000
2025-12-06 12:38.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.12it/s, critic_loss=14.1, conservative_loss=-0.0183, alpha=0.000325, actor_loss=-9.36, temp=0.197, temp_loss=0.00304] 


2025-12-06 12:39.29 [info     ] CalQL_20251206112128: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.005181861400604248, 'time_algorithm_update': 0.04342702913284302, 'critic_loss': 14.123231582164765, 'conservative_loss': -0.01826255481503904, 'alpha': 0.000325023553421488, 'actor_loss': -9.36159517288208, 'temp': 0.19729246972501277, 'temp_loss': 0.0030044389293761922, 'time_step': 0.04890747880935669, 'td_error': 6.612712448303494, 'value_scale': 10.417900704998095, 'discounted_advantage': -8.12932044584739, 'initial_state': 12.812150955200195, 'diff_eval': 3152.0364844807514} step=89000
2025-12-06 12:39.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.61it/s, critic_loss=14.4, conservative_loss=-0.0167, alpha=0.000297, actor_loss=-9.45, temp=0.195, temp_loss=-0.00012]


2025-12-06 12:40.21 [info     ] CalQL_20251206112128: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.005010587930679321, 'time_algorithm_update': 0.042464298009872434, 'critic_loss': 14.417020265579223, 'conservative_loss': -0.01673733557201922, 'alpha': 0.0002970253140083514, 'actor_loss': -9.450603815078736, 'temp': 0.19511974701285362, 'temp_loss': -6.301154382526875e-06, 'time_step': 0.04776659631729126, 'td_error': 6.566008992477532, 'value_scale': 10.43569509529689, 'discounted_advantage': -8.57169791064355, 'initial_state': 14.213937759399414, 'diff_eval': 3221.681824688749} step=90000
2025-12-06 12:40.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.78it/s, critic_loss=14.5, conservative_loss=-0.0152, alpha=0.000272, actor_loss=-9.64, temp=0.196, temp_loss=-0.000798]


2025-12-06 12:41.12 [info     ] CalQL_20251206112128: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.005020436525344849, 'time_algorithm_update': 0.04211870288848877, 'critic_loss': 14.491089440822602, 'conservative_loss': -0.015218052008189261, 'alpha': 0.0002713996311358642, 'actor_loss': -9.63742293548584, 'temp': 0.1958363047838211, 'temp_loss': -0.0007891555142123252, 'time_step': 0.047418349027633665, 'td_error': 6.7460959112187435, 'value_scale': 10.359877379864457, 'discounted_advantage': -8.282556573719372, 'initial_state': 13.263208389282227, 'diff_eval': 3254.485270709822} step=91000
2025-12-06 12:41.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.44it/s, critic_loss=14.5, conservative_loss=-0.0138, alpha=0.000248, actor_loss=-9.75, temp=0.195, temp_loss=0.0022] 


2025-12-06 12:42.05 [info     ] CalQL_20251206112128: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.005056809663772583, 'time_algorithm_update': 0.04287146711349487, 'critic_loss': 14.511464468955994, 'conservative_loss': -0.01383692427072674, 'alpha': 0.0002480278297298355, 'actor_loss': -9.751264420509338, 'temp': 0.19507065732777118, 'temp_loss': 0.0020622299703536556, 'time_step': 0.04822735023498535, 'td_error': 6.654686820362235, 'value_scale': 10.232791613590297, 'discounted_advantage': -7.749526020539338, 'initial_state': 13.449965476989746, 'diff_eval': 3287.604842123376} step=92000
2025-12-06 12:42.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.80it/s, critic_loss=15, conservative_loss=-0.0126, alpha=0.000227, actor_loss=-9.83, temp=0.195, temp_loss=-0.0017]   


2025-12-06 12:42.56 [info     ] CalQL_20251206112128: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.004977125406265259, 'time_algorithm_update': 0.04209503674507141, 'critic_loss': 14.968882289409638, 'conservative_loss': -0.012627516260370612, 'alpha': 0.00022663517408363988, 'actor_loss': -9.834035955429076, 'temp': 0.19502585846185685, 'temp_loss': -0.0015982586810132488, 'time_step': 0.04735482954978943, 'td_error': 6.599679960305392, 'value_scale': 10.487811179885488, 'discounted_advantage': -8.473193179525094, 'initial_state': 13.445884704589844, 'diff_eval': 3322.9192784495913} step=93000
2025-12-06 12:42.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.16it/s, critic_loss=15.1, conservative_loss=-0.0115, alpha=0.000207, actor_loss=-9.87, temp=0.196, temp_loss=-0.000477]


2025-12-06 12:43.50 [info     ] CalQL_20251206112128: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.00516029143333435, 'time_algorithm_update': 0.04340195870399475, 'critic_loss': 15.075324936389922, 'conservative_loss': -0.011479089521802962, 'alpha': 0.00020707575748383532, 'actor_loss': -9.872390865325928, 'temp': 0.19629153847694397, 'temp_loss': -0.00030126548733096567, 'time_step': 0.0488637490272522, 'td_error': 6.782663741848553, 'value_scale': 10.187017514413318, 'discounted_advantage': -7.926405339506924, 'initial_state': 13.346762657165527, 'diff_eval': 3469.779026826688} step=94000
2025-12-06 12:43.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.49it/s, critic_loss=15.2, conservative_loss=-0.0104, alpha=0.000189, actor_loss=-9.97, temp=0.194, temp_loss=0.00383]


2025-12-06 12:44.42 [info     ] CalQL_20251206112128: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.005107765674591065, 'time_algorithm_update': 0.04272894525527954, 'critic_loss': 15.231390357971192, 'conservative_loss': -0.010359357789158821, 'alpha': 0.00018929853657027708, 'actor_loss': -9.970348063945771, 'temp': 0.19352276754379272, 'temp_loss': 0.0036667176440823824, 'time_step': 0.04812194895744324, 'td_error': 6.912838189496419, 'value_scale': 10.863326398192063, 'discounted_advantage': -8.256228824131705, 'initial_state': 14.092663764953613, 'diff_eval': 3241.056143009414} step=95000
2025-12-06 12:44.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.59it/s, critic_loss=15.4, conservative_loss=-0.0095, alpha=0.000173, actor_loss=-10, temp=0.193, temp_loss=-0.0014]    


2025-12-06 12:45.34 [info     ] CalQL_20251206112128: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.00508135199546814, 'time_algorithm_update': 0.042473416090011595, 'critic_loss': 15.440136752128602, 'conservative_loss': -0.009497100165113806, 'alpha': 0.0001730488630128093, 'actor_loss': -10.023870653629302, 'temp': 0.1925741145014763, 'temp_loss': -0.0013496965632075445, 'time_step': 0.0478459107875824, 'td_error': 6.751841699965591, 'value_scale': 10.56499773907348, 'discounted_advantage': -8.481642603522115, 'initial_state': 13.90768814086914, 'diff_eval': 3395.1381690336234} step=96000
2025-12-06 12:45.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.36it/s, critic_loss=15.4, conservative_loss=-0.00867, alpha=0.000158, actor_loss=-10.1, temp=0.192, temp_loss=0.00235]


2025-12-06 12:46.27 [info     ] CalQL_20251206112128: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.005062329053878784, 'time_algorithm_update': 0.0430593318939209, 'critic_loss': 15.392390653133392, 'conservative_loss': -0.008665372168179602, 'alpha': 0.00015809226680721622, 'actor_loss': -10.10603317451477, 'temp': 0.19163163037598133, 'temp_loss': 0.002360678423079662, 'time_step': 0.04842312455177307, 'td_error': 7.138871328440579, 'value_scale': 10.648093652342386, 'discounted_advantage': -8.209972089317251, 'initial_state': 13.615612030029297, 'diff_eval': 3507.525281575232} step=97000
2025-12-06 12:46.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.38it/s, critic_loss=15.8, conservative_loss=-0.00787, alpha=0.000144, actor_loss=-10.1, temp=0.191, temp_loss=0.000778]


2025-12-06 12:47.20 [info     ] CalQL_20251206112128: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.005249431848526001, 'time_algorithm_update': 0.042794646978378294, 'critic_loss': 15.848855405330658, 'conservative_loss': -0.007872902544215321, 'alpha': 0.00014441419977811166, 'actor_loss': -10.152294627189637, 'temp': 0.19071859957277776, 'temp_loss': 0.0007556009951513261, 'time_step': 0.0483416588306427, 'td_error': 6.689727852309346, 'value_scale': 10.442042166621667, 'discounted_advantage': -8.363520230599178, 'initial_state': 14.887717247009277, 'diff_eval': 3455.4770354983207} step=98000
2025-12-06 12:47.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.41it/s, critic_loss=15.8, conservative_loss=-0.00717, alpha=0.000132, actor_loss=-10.1, temp=0.192, temp_loss=-0.00205]


2025-12-06 12:48.12 [info     ] CalQL_20251206112128: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.005210586547851562, 'time_algorithm_update': 0.04276713466644287, 'critic_loss': 15.777432703971863, 'conservative_loss': -0.0071636259751394395, 'alpha': 0.00013198910193750634, 'actor_loss': -10.115125359535217, 'temp': 0.19183635622262954, 'temp_loss': -0.0020279382152948527, 'time_step': 0.04827281284332276, 'td_error': 6.9535717533958055, 'value_scale': 10.664805008946276, 'discounted_advantage': -8.4966003573898, 'initial_state': 14.47060775756836, 'diff_eval': 3469.53488496598} step=99000
2025-12-06 12:48.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:51<00:00, 19.53it/s, critic_loss=15.7, conservative_loss=-0.00654, alpha=0.000121, actor_loss=-10.1, temp=0.193, temp_loss=-0.00143]


2025-12-06 12:49.07 [info     ] CalQL_20251206112128: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.005032833576202393, 'time_algorithm_update': 0.0450564067363739, 'critic_loss': 15.754217569828034, 'conservative_loss': -0.006540671788621694, 'alpha': 0.0001206072074783151, 'actor_loss': -10.116842734336853, 'temp': 0.1932605656683445, 'temp_loss': -0.0012841100618243217, 'time_step': 0.05038723373413086, 'td_error': 7.7760950126261, 'value_scale': 10.776970191000768, 'discounted_advantage': -8.737765874350695, 'initial_state': 13.13906478881836, 'diff_eval': 3663.8004224175716} step=100000
2025-12-06 12:49.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.76it/s, critic_loss=15.8, conservative_loss=-0.00595, alpha=0.00011, actor_loss=-9.98, temp=0.192, temp_loss=0.00235]


2025-12-06 12:49.59 [info     ] CalQL_20251206112128: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.005013571739196777, 'time_algorithm_update': 0.042142773389816285, 'critic_loss': 15.815021183013917, 'conservative_loss': -0.005944116137456149, 'alpha': 0.0001102180751186097, 'actor_loss': -9.981215173721314, 'temp': 0.19171571835875512, 'temp_loss': 0.0024541009831009434, 'time_step': 0.04744851517677307, 'td_error': 7.3816580211738305, 'value_scale': 10.15310891737657, 'discounted_advantage': -7.854462343569896, 'initial_state': 14.174304008483887, 'diff_eval': 3852.7168164366008} step=101000
2025-12-06 12:49.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.73it/s, critic_loss=15.9, conservative_loss=-0.00544, alpha=0.000101, actor_loss=-9.83, temp=0.189, temp_loss=0.00216]


2025-12-06 12:50.50 [info     ] CalQL_20251206112128: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.005001705408096313, 'time_algorithm_update': 0.04225845432281494, 'critic_loss': 15.92775393486023, 'conservative_loss': -0.005434864394832402, 'alpha': 0.00010070652180002071, 'actor_loss': -9.826769769668578, 'temp': 0.18948766626417637, 'temp_loss': 0.0021305503678740933, 'time_step': 0.04755277037620544, 'td_error': 7.47044446908347, 'value_scale': 10.269665713961434, 'discounted_advantage': -7.500843344982097, 'initial_state': 13.613398551940918, 'diff_eval': 3600.4810309688455} step=102000
2025-12-06 12:50.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.64it/s, critic_loss=15.6, conservative_loss=-0.00501, alpha=9.2e-5, actor_loss=-9.67, temp=0.188, temp_loss=0.000375] 


2025-12-06 12:51.42 [info     ] CalQL_20251206112128: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.005052673816680908, 'time_algorithm_update': 0.042384284257888795, 'critic_loss': 15.54104465007782, 'conservative_loss': -0.005008095987606794, 'alpha': 9.196599183633225e-05, 'actor_loss': -9.667167147636414, 'temp': 0.18795972019433976, 'temp_loss': 0.0003313381030457094, 'time_step': 0.0477265100479126, 'td_error': 7.111785378073846, 'value_scale': 10.15076188986445, 'discounted_advantage': -7.445156220499676, 'initial_state': 12.921838760375977, 'diff_eval': 3465.280050156116} step=103000
2025-12-06 12:51.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.69it/s, critic_loss=15.2, conservative_loss=-0.00458, alpha=8.4e-5, actor_loss=-9.39, temp=0.187, temp_loss=0.000299] 


2025-12-06 12:52.34 [info     ] CalQL_20251206112128: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.005042887687683105, 'time_algorithm_update': 0.042325527429580685, 'critic_loss': 15.139080498218537, 'conservative_loss': -0.004575049199163914, 'alpha': 8.398095612210455e-05, 'actor_loss': -9.383534029483796, 'temp': 0.1872860370129347, 'temp_loss': 0.00024508030887227504, 'time_step': 0.04766598677635193, 'td_error': 6.555097458739147, 'value_scale': 9.770610791367963, 'discounted_advantage': -7.180447202105602, 'initial_state': 13.530359268188477, 'diff_eval': 3593.763755905077} step=104000
2025-12-06 12:52.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.86it/s, critic_loss=14.9, conservative_loss=-0.00419, alpha=7.67e-5, actor_loss=-9.14, temp=0.187, temp_loss=0.000159]


2025-12-06 12:53.26 [info     ] CalQL_20251206112128: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.004977025270462036, 'time_algorithm_update': 0.04197156882286072, 'critic_loss': 14.880604693889618, 'conservative_loss': -0.00418520869850181, 'alpha': 7.669978646299569e-05, 'actor_loss': -9.134402389526366, 'temp': 0.18740351858735085, 'temp_loss': 0.00016489485802594571, 'time_step': 0.047238590717315676, 'td_error': 7.071876164691609, 'value_scale': 10.103207896313297, 'discounted_advantage': -7.821723380797963, 'initial_state': 13.369895935058594, 'diff_eval': 3597.2507936099587} step=105000
2025-12-06 12:53.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.58it/s, critic_loss=14.4, conservative_loss=-0.00383, alpha=7.01e-5, actor_loss=-8.85, temp=0.187, temp_loss=0.00109] 


2025-12-06 12:54.18 [info     ] CalQL_20251206112128: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.005063987970352173, 'time_algorithm_update': 0.04253814697265625, 'critic_loss': 14.35976156616211, 'conservative_loss': -0.0038309364723972976, 'alpha': 7.005132726771989e-05, 'actor_loss': -8.8521970911026, 'temp': 0.18700772315263747, 'temp_loss': 0.000946894497377798, 'time_step': 0.04789581203460693, 'td_error': 6.336949630522432, 'value_scale': 9.649736454758765, 'discounted_advantage': -7.056219160166709, 'initial_state': 13.414508819580078, 'diff_eval': 3592.8433081238745} step=106000
2025-12-06 12:54.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.33it/s, critic_loss=13.7, conservative_loss=-0.00352, alpha=6.4e-5, actor_loss=-8.6, temp=0.187, temp_loss=-0.000546] 


2025-12-06 12:55.10 [info     ] CalQL_20251206112128: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.005140062093734741, 'time_algorithm_update': 0.04302310919761658, 'critic_loss': 13.747626902103423, 'conservative_loss': -0.0035230592363514005, 'alpha': 6.396610778028844e-05, 'actor_loss': -8.602054349899293, 'temp': 0.18677112472057342, 'temp_loss': -0.0006242488229181618, 'time_step': 0.048462913036346436, 'td_error': 6.288291312790736, 'value_scale': 10.200295779337266, 'discounted_advantage': -7.978218760810327, 'initial_state': 14.146716117858887, 'diff_eval': 3503.52796993283} step=107000
2025-12-06 12:55.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:47<00:00, 21.09it/s, critic_loss=13.6, conservative_loss=-0.00324, alpha=5.84e-5, actor_loss=-8.4, temp=0.188, temp_loss=-0.0011]  


2025-12-06 12:56.02 [info     ] CalQL_20251206112128: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.004854241847991944, 'time_algorithm_update': 0.04161077618598938, 'critic_loss': 13.632255214214325, 'conservative_loss': -0.0032365775385405867, 'alpha': 5.8397972792590734e-05, 'actor_loss': -8.399104498386382, 'temp': 0.18762090037763118, 'temp_loss': -0.0008908206972992048, 'time_step': 0.04675019145011902, 'td_error': 6.463390189061292, 'value_scale': 9.351497972597995, 'discounted_advantage': -6.65912070049351, 'initial_state': 12.336189270019531, 'diff_eval': 3679.957346212673} step=108000
2025-12-06 12:56.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.47it/s, critic_loss=13.1, conservative_loss=-0.00296, alpha=5.33e-5, actor_loss=-8.21, temp=0.186, temp_loss=0.00199]


2025-12-06 12:56.55 [info     ] CalQL_20251206112128: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.00482046890258789, 'time_algorithm_update': 0.042961848974227904, 'critic_loss': 13.09476569223404, 'conservative_loss': -0.0029546921548899265, 'alpha': 5.332161537808133e-05, 'actor_loss': -8.211068017005921, 'temp': 0.18646839924156666, 'temp_loss': 0.0020459602682385593, 'time_step': 0.04808501434326172, 'td_error': 6.534628898775609, 'value_scale': 9.758241833846514, 'discounted_advantage': -6.886217891788641, 'initial_state': 12.179878234863281, 'diff_eval': 3682.8191620669027} step=109000
2025-12-06 12:56.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.68it/s, critic_loss=12.9, conservative_loss=-0.00273, alpha=4.87e-5, actor_loss=-8.11, temp=0.187, temp_loss=-0.000401]


2025-12-06 12:57.46 [info     ] CalQL_20251206112128: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.004967052459716797, 'time_algorithm_update': 0.042341443300247195, 'critic_loss': 12.839367309093475, 'conservative_loss': -0.002729107946623117, 'alpha': 4.867788691626629e-05, 'actor_loss': -8.113983139038085, 'temp': 0.1867165121436119, 'temp_loss': -0.0004832580587826669, 'time_step': 0.0476102569103241, 'td_error': 6.031131690885678, 'value_scale': 9.912487507572559, 'discounted_advantage': -7.488823125775689, 'initial_state': 13.934657096862793, 'diff_eval': 3320.396108423194} step=110000
2025-12-06 12:57.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.84it/s, critic_loss=12.6, conservative_loss=-0.00248, alpha=4.45e-5, actor_loss=-8.01, temp=0.185, temp_loss=0.00259] 


2025-12-06 12:58.38 [info     ] CalQL_20251206112128: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.005358484983444214, 'time_algorithm_update': 0.04160235357284546, 'critic_loss': 12.63533736371994, 'conservative_loss': -0.00247804328519851, 'alpha': 4.445615661825286e-05, 'actor_loss': -8.014147292137146, 'temp': 0.1854336988925934, 'temp_loss': 0.0025877196757355704, 'time_step': 0.047268622636795044, 'td_error': 6.2110330837907775, 'value_scale': 9.63782304224979, 'discounted_advantage': -7.7066096127993005, 'initial_state': 13.297289848327637, 'diff_eval': 3574.882997871488} step=111000
2025-12-06 12:58.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.87it/s, critic_loss=12.4, conservative_loss=-0.00227, alpha=4.06e-5, actor_loss=-8.03, temp=0.183, temp_loss=0.00128] 


2025-12-06 12:59.29 [info     ] CalQL_20251206112128: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.0048758208751678465, 'time_algorithm_update': 0.04197481393814087, 'critic_loss': 12.365060368537902, 'conservative_loss': -0.0022725594283547253, 'alpha': 4.060392106839572e-05, 'actor_loss': -8.02654995727539, 'temp': 0.18265029992163181, 'temp_loss': 0.0012701159096322953, 'time_step': 0.04715603423118591, 'td_error': 5.990583218296091, 'value_scale': 9.832932424950917, 'discounted_advantage': -7.684341035876611, 'initial_state': 13.418068885803223, 'diff_eval': 3299.591780313493} step=112000
2025-12-06 12:59.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.91it/s, critic_loss=12.2, conservative_loss=-0.00207, alpha=3.71e-5, actor_loss=-8.02, temp=0.181, temp_loss=0.00267]


2025-12-06 13:00.21 [info     ] CalQL_20251206112128: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.004943920612335205, 'time_algorithm_update': 0.041906172037124635, 'critic_loss': 12.171313292980194, 'conservative_loss': -0.0020725242181215435, 'alpha': 3.708470447600121e-05, 'actor_loss': -8.01799756336212, 'temp': 0.18060442039370536, 'temp_loss': 0.002587825652444735, 'time_step': 0.047140640258789064, 'td_error': 6.586500679086933, 'value_scale': 9.503936731123833, 'discounted_advantage': -7.765605199775901, 'initial_state': 11.755136489868164, 'diff_eval': 3240.114889116546} step=113000
2025-12-06 13:00.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.80it/s, critic_loss=12.2, conservative_loss=-0.00189, alpha=3.39e-5, actor_loss=-8.02, temp=0.177, temp_loss=0.00325]


2025-12-06 13:01.12 [info     ] CalQL_20251206112128: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.005011412382125854, 'time_algorithm_update': 0.04205503058433533, 'critic_loss': 12.180851289749146, 'conservative_loss': -0.0018933553997194394, 'alpha': 3.3875387853186115e-05, 'actor_loss': -8.012911241531372, 'temp': 0.17745497351884842, 'temp_loss': 0.0032159785146359354, 'time_step': 0.04736816143989563, 'td_error': 6.202047450077243, 'value_scale': 10.14037730242067, 'discounted_advantage': -7.8818775106387955, 'initial_state': 13.652264595031738, 'diff_eval': 3301.874442988336} step=114000
2025-12-06 13:01.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.29it/s, critic_loss=12.2, conservative_loss=-0.00173, alpha=3.1e-5, actor_loss=-8.1, temp=0.174, temp_loss=0.00166] 


2025-12-06 13:02.05 [info     ] CalQL_20251206112128: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.0050553114414215086, 'time_algorithm_update': 0.04319049096107483, 'critic_loss': 12.186542483091355, 'conservative_loss': -0.0017328044464811683, 'alpha': 3.0942988600145326e-05, 'actor_loss': -8.09874205684662, 'temp': 0.1740920240432024, 'temp_loss': 0.0017763884317828342, 'time_step': 0.04854317164421081, 'td_error': 6.09481410599993, 'value_scale': 10.167793171953376, 'discounted_advantage': -8.539772854103377, 'initial_state': 13.42291259765625, 'diff_eval': 3321.851506545591} step=115000
2025-12-06 13:02.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.61it/s, critic_loss=11.9, conservative_loss=-0.00159, alpha=2.83e-5, actor_loss=-8.11, temp=0.173, temp_loss=0.00165]


2025-12-06 13:02.57 [info     ] CalQL_20251206112128: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.005069895744323731, 'time_algorithm_update': 0.042473328590393065, 'critic_loss': 11.95128382563591, 'conservative_loss': -0.001588945658528246, 'alpha': 2.8258080632440398e-05, 'actor_loss': -8.115137046813965, 'temp': 0.17277109387516976, 'temp_loss': 0.0016883469722815789, 'time_step': 0.047825085163116454, 'td_error': 6.827903066633047, 'value_scale': 10.390981131421412, 'discounted_advantage': -7.9362666373987265, 'initial_state': 12.9817533493042, 'diff_eval': 3491.4285995001223} step=116000
2025-12-06 13:02.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.56it/s, critic_loss=12.3, conservative_loss=-0.00145, alpha=2.58e-5, actor_loss=-8.12, temp=0.171, temp_loss=0.00116] 


2025-12-06 13:03.49 [info     ] CalQL_20251206112128: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.0050552721023559575, 'time_algorithm_update': 0.04251505327224731, 'critic_loss': 12.291466567039489, 'conservative_loss': -0.001446695860591717, 'alpha': 2.5811255083681317e-05, 'actor_loss': -8.123066319465638, 'temp': 0.1712328214943409, 'temp_loss': 0.0012358484592987224, 'time_step': 0.04789692234992981, 'td_error': 5.983457524044959, 'value_scale': 10.655405944695717, 'discounted_advantage': -7.6714264497455105, 'initial_state': 15.3956937789917, 'diff_eval': 3342.1429603263787} step=117000
2025-12-06 13:03.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.59it/s, critic_loss=12, conservative_loss=-0.00133, alpha=2.36e-5, actor_loss=-8.12, temp=0.171, temp_loss=-0.00116]  


2025-12-06 13:04.42 [info     ] CalQL_20251206112128: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.005044062614440918, 'time_algorithm_update': 0.042522849559783936, 'critic_loss': 11.955511447429657, 'conservative_loss': -0.001327205145265907, 'alpha': 2.3574022534376127e-05, 'actor_loss': -8.115748982429505, 'temp': 0.17081446409225465, 'temp_loss': -0.0012198616248788312, 'time_step': 0.04785437560081482, 'td_error': 6.333297310025866, 'value_scale': 10.888225217014146, 'discounted_advantage': -7.794322664665438, 'initial_state': 14.544836044311523, 'diff_eval': 3389.1943166850087} step=118000
2025-12-06 13:04.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.68it/s, critic_loss=11.8, conservative_loss=-0.00121, alpha=2.15e-5, actor_loss=-8.09, temp=0.172, temp_loss=-0.000451]


2025-12-06 13:05.33 [info     ] CalQL_20251206112128: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.005016973257064819, 'time_algorithm_update': 0.042328759670257565, 'critic_loss': 11.795914286136627, 'conservative_loss': -0.0012052230854751541, 'alpha': 2.15357970646437e-05, 'actor_loss': -8.09763444185257, 'temp': 0.17209370112419128, 'temp_loss': -0.00045344958209898325, 'time_step': 0.04763175988197327, 'td_error': 6.19935602667033, 'value_scale': 10.562811243501613, 'discounted_advantage': -7.653045596957623, 'initial_state': 14.493966102600098, 'diff_eval': 3568.624736015403} step=119000
2025-12-06 13:05.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.33it/s, critic_loss=12, conservative_loss=-0.0011, alpha=1.97e-5, actor_loss=-8.03, temp=0.172, temp_loss=-0.000924]  


2025-12-06 13:06.26 [info     ] CalQL_20251206112128: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.005113916873931885, 'time_algorithm_update': 0.04303042840957642, 'critic_loss': 12.004130353927613, 'conservative_loss': -0.0011045001022866928, 'alpha': 1.9674855489938636e-05, 'actor_loss': -8.02721535205841, 'temp': 0.1719489236176014, 'temp_loss': -0.000833738480694592, 'time_step': 0.048445102691650394, 'td_error': 7.064868325116086, 'value_scale': 10.543144622257124, 'discounted_advantage': -8.165934101258857, 'initial_state': 12.379058837890625, 'diff_eval': 3554.9198322456964} step=120000
2025-12-06 13:06.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.95it/s, critic_loss=12, conservative_loss=-0.00101, alpha=1.8e-5, actor_loss=-8.08, temp=0.172, temp_loss=0.00175]   


2025-12-06 13:07.20 [info     ] CalQL_20251206112128: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.0051369740962982175, 'time_algorithm_update': 0.04393287706375122, 'critic_loss': 12.014499137878419, 'conservative_loss': -0.0010090962006361224, 'alpha': 1.7969315549635212e-05, 'actor_loss': -8.078181586742401, 'temp': 0.17213237646222115, 'temp_loss': 0.001844168223789893, 'time_step': 0.04936639451980591, 'td_error': 5.990971047530985, 'value_scale': 10.686871031684877, 'discounted_advantage': -8.255933854525367, 'initial_state': 15.133872032165527, 'diff_eval': 3882.941216523574} step=121000
2025-12-06 13:07.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.56it/s, critic_loss=11.9, conservative_loss=-0.000917, alpha=1.64e-5, actor_loss=-8.1, temp=0.169, temp_loss=0.00279]


2025-12-06 13:08.12 [info     ] CalQL_20251206112128: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.004977621793746948, 'time_algorithm_update': 0.042683890104293826, 'critic_loss': 11.8801270737648, 'conservative_loss': -0.00091668026993284, 'alpha': 1.6417263368566636e-05, 'actor_loss': -8.10276772069931, 'temp': 0.1690152686238289, 'temp_loss': 0.0027639864166558253, 'time_step': 0.047965606451034544, 'td_error': 6.631533805557149, 'value_scale': 10.336338381820857, 'discounted_advantage': -7.802370655397455, 'initial_state': 12.386707305908203, 'diff_eval': 3799.5302480530377} step=122000
2025-12-06 13:08.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.62it/s, critic_loss=11.9, conservative_loss=-0.000839, alpha=1.5e-5, actor_loss=-8.11, temp=0.166, temp_loss=0.00272]


2025-12-06 13:09.04 [info     ] CalQL_20251206112128: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.005065761804580689, 'time_algorithm_update': 0.04244729018211365, 'critic_loss': 11.901831180095673, 'conservative_loss': -0.0008382980262394994, 'alpha': 1.5000227163909586e-05, 'actor_loss': -8.102187534809113, 'temp': 0.1658946043252945, 'temp_loss': 0.0028968671052134595, 'time_step': 0.047796674489974975, 'td_error': 6.869301688172409, 'value_scale': 10.243701556222849, 'discounted_advantage': -8.34859671303201, 'initial_state': 12.029279708862305, 'diff_eval': 3478.3224036907645} step=123000
2025-12-06 13:09.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.57it/s, critic_loss=12, conservative_loss=-0.000766, alpha=1.37e-5, actor_loss=-8.16, temp=0.165, temp_loss=0.000407]  


2025-12-06 13:09.56 [info     ] CalQL_20251206112128: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.0050725257396698, 'time_algorithm_update': 0.04251209592819214, 'critic_loss': 12.00207045698166, 'conservative_loss': -0.0007653192075085827, 'alpha': 1.37015795780826e-05, 'actor_loss': -8.159323225021362, 'temp': 0.16516403436660768, 'temp_loss': 0.0004066060043405741, 'time_step': 0.04789570522308349, 'td_error': 6.328201661855332, 'value_scale': 10.578257630912514, 'discounted_advantage': -7.802505828945597, 'initial_state': 13.762799263000488, 'diff_eval': 3648.6267112478035} step=124000
2025-12-06 13:09.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.41it/s, critic_loss=12, conservative_loss=-0.000699, alpha=1.25e-5, actor_loss=-8.15, temp=0.164, temp_loss=0.000702] 


2025-12-06 13:10.49 [info     ] CalQL_20251206112128: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.005080650568008423, 'time_algorithm_update': 0.042890309810638426, 'critic_loss': 12.038809827804565, 'conservative_loss': -0.0006991099651786498, 'alpha': 1.2517473214757046e-05, 'actor_loss': -8.152447339057922, 'temp': 0.16395018890500068, 'temp_loss': 0.0005762577066197992, 'time_step': 0.04826357531547546, 'td_error': 6.4621740061879995, 'value_scale': 10.351997483585714, 'discounted_advantage': -8.126260753463407, 'initial_state': 13.978625297546387, 'diff_eval': 3440.75689106375} step=125000
2025-12-06 13:10.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.68it/s, critic_loss=11.8, conservative_loss=-0.00064, alpha=1.14e-5, actor_loss=-8.16, temp=0.164, temp_loss=-0.000528]


2025-12-06 13:11.41 [info     ] CalQL_20251206112128: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.005027802467346192, 'time_algorithm_update': 0.04234438157081604, 'critic_loss': 11.774432742834092, 'conservative_loss': -0.0006397795597440563, 'alpha': 1.1433667819801485e-05, 'actor_loss': -8.161198912620545, 'temp': 0.16406317235529422, 'temp_loss': -0.0006196249770582654, 'time_step': 0.04765750789642334, 'td_error': 6.737342396774249, 'value_scale': 10.27057160970139, 'discounted_advantage': -8.13353372994843, 'initial_state': 12.832236289978027, 'diff_eval': 3355.9775944008366} step=126000
2025-12-06 13:11.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.61it/s, critic_loss=11.9, conservative_loss=-0.000582, alpha=1.04e-5, actor_loss=-8.17, temp=0.165, temp_loss=-0.000778]


2025-12-06 13:12.33 [info     ] CalQL_20251206112128: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.005054275751113892, 'time_algorithm_update': 0.042488338470458985, 'critic_loss': 11.904516607284545, 'conservative_loss': -0.0005815323835122399, 'alpha': 1.0445291881296726e-05, 'actor_loss': -8.171711284637452, 'temp': 0.16473850478231908, 'temp_loss': -0.0007149088349542581, 'time_step': 0.04781966853141785, 'td_error': 6.494541189580642, 'value_scale': 10.550664699547404, 'discounted_advantage': -7.9604896318291605, 'initial_state': 13.992402076721191, 'diff_eval': 3804.4326692531145} step=127000
2025-12-06 13:12.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.50it/s, critic_loss=12.3, conservative_loss=-0.000532, alpha=9.55e-6, actor_loss=-8.19, temp=0.166, temp_loss=0.000463]


2025-12-06 13:13.25 [info     ] CalQL_20251206112128: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.0050178756713867186, 'time_algorithm_update': 0.042730449199676515, 'critic_loss': 12.248025916099548, 'conservative_loss': -0.0005316389450163115, 'alpha': 9.542196814436465e-06, 'actor_loss': -8.191691905498505, 'temp': 0.16569515331089496, 'temp_loss': 0.0004741720054880716, 'time_step': 0.048045598030090335, 'td_error': 6.434028153567878, 'value_scale': 10.431839686461661, 'discounted_advantage': -8.241202122462907, 'initial_state': 12.420654296875, 'diff_eval': 3625.9819817569282} step=128000
2025-12-06 13:13.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.49it/s, critic_loss=11.9, conservative_loss=-0.000486, alpha=8.72e-6, actor_loss=-8.2, temp=0.164, temp_loss=0.000998]


2025-12-06 13:14.17 [info     ] CalQL_20251206112128: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.005094210386276245, 'time_algorithm_update': 0.04270683217048645, 'critic_loss': 11.8547366065979, 'conservative_loss': -0.00048618145869113507, 'alpha': 8.717081971553853e-06, 'actor_loss': -8.200079935073852, 'temp': 0.16356201845407486, 'temp_loss': 0.0011368210668442772, 'time_step': 0.04808688735961914, 'td_error': 6.4859839587747645, 'value_scale': 10.322588178240956, 'discounted_advantage': -8.476491028063386, 'initial_state': 13.888354301452637, 'diff_eval': 4004.7745464971636} step=129000
2025-12-06 13:14.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.31it/s, critic_loss=12, conservative_loss=-0.000443, alpha=7.97e-6, actor_loss=-8.27, temp=0.162, temp_loss=0.0017]  


2025-12-06 13:15.11 [info     ] CalQL_20251206112128: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.005161253213882447, 'time_algorithm_update': 0.043046756982803344, 'critic_loss': 12.020084198236466, 'conservative_loss': -0.00044274538752506486, 'alpha': 7.96375173740671e-06, 'actor_loss': -8.272270894527436, 'temp': 0.162441934466362, 'temp_loss': 0.0017396983469370752, 'time_step': 0.04850138115882874, 'td_error': 6.148977830877653, 'value_scale': 10.26299002666373, 'discounted_advantage': -8.246194208255188, 'initial_state': 14.4171142578125, 'diff_eval': 3572.3416912888097} step=130000
2025-12-06 13:15.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.44it/s, critic_loss=12, conservative_loss=-0.000405, alpha=7.28e-6, actor_loss=-8.3, temp=0.161, temp_loss=0.00122]    


2025-12-06 13:16.03 [info     ] CalQL_20251206112128: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.005076147079467773, 'time_algorithm_update': 0.04280585408210755, 'critic_loss': 11.999623052597046, 'conservative_loss': -0.0004045083465462085, 'alpha': 7.274559619872889e-06, 'actor_loss': -8.30076237821579, 'temp': 0.16132526199519634, 'temp_loss': 0.0010714565934613347, 'time_step': 0.04817903161048889, 'td_error': 6.51568355731094, 'value_scale': 10.606955293306811, 'discounted_advantage': -7.386016317104911, 'initial_state': 14.273165702819824, 'diff_eval': 3454.1188679316206} step=131000
2025-12-06 13:16.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.80it/s, critic_loss=12, conservative_loss=-0.000369, alpha=6.65e-6, actor_loss=-8.35, temp=0.16, temp_loss=0.000625]  


2025-12-06 13:16.56 [info     ] CalQL_20251206112128: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.005038652420043945, 'time_algorithm_update': 0.04205461311340332, 'critic_loss': 12.013799397468567, 'conservative_loss': -0.00036916994518833236, 'alpha': 6.6459827203289024e-06, 'actor_loss': -8.355147772312165, 'temp': 0.15970063999295234, 'temp_loss': 0.0006493975942139514, 'time_step': 0.04738822937011719, 'td_error': 6.413276019717356, 'value_scale': 10.360951849873812, 'discounted_advantage': -8.25485135972652, 'initial_state': 14.293883323669434, 'diff_eval': 4032.3772356112354} step=132000
2025-12-06 13:16.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.50it/s, critic_loss=12, conservative_loss=-0.000336, alpha=6.07e-6, actor_loss=-8.36, temp=0.158, temp_loss=0.00192] 


2025-12-06 13:17.48 [info     ] CalQL_20251206112128: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.005030058860778809, 'time_algorithm_update': 0.042736475706100466, 'critic_loss': 12.01458397436142, 'conservative_loss': -0.00033534923632396385, 'alpha': 6.072531132303993e-06, 'actor_loss': -8.362657914161682, 'temp': 0.15843209983408452, 'temp_loss': 0.002007104903459549, 'time_step': 0.048062885761260984, 'td_error': 6.16841931973097, 'value_scale': 10.258703294393591, 'discounted_advantage': -8.49885329426476, 'initial_state': 14.472477912902832, 'diff_eval': 3703.681528018138} step=133000
2025-12-06 13:17.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.53it/s, critic_loss=12.1, conservative_loss=-0.000307, alpha=5.55e-6, actor_loss=-8.48, temp=0.158, temp_loss=-0.00126]


2025-12-06 13:18.40 [info     ] CalQL_20251206112128: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.0050424327850341795, 'time_algorithm_update': 0.04266176509857178, 'critic_loss': 12.124410666942596, 'conservative_loss': -0.00030685341308708304, 'alpha': 5.5478041999776905e-06, 'actor_loss': -8.481828283786774, 'temp': 0.15826302868127823, 'temp_loss': -0.0013036452773958446, 'time_step': 0.04800037717819214, 'td_error': 6.401456603705992, 'value_scale': 10.299867532870202, 'discounted_advantage': -8.148048233870012, 'initial_state': 13.075299263000488, 'diff_eval': 3594.415874435234} step=134000
2025-12-06 13:18.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.16it/s, critic_loss=12.2, conservative_loss=-0.000279, alpha=5.07e-6, actor_loss=-8.59, temp=0.159, temp_loss=0.00108] 


2025-12-06 13:19.33 [info     ] CalQL_20251206112128: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.0052075042724609374, 'time_algorithm_update': 0.043344173192977904, 'critic_loss': 12.229068700790405, 'conservative_loss': -0.00027925075580424166, 'alpha': 5.068666090664919e-06, 'actor_loss': -8.595079736709595, 'temp': 0.15865847189724444, 'temp_loss': 0.0010017022349638864, 'time_step': 0.04885062432289124, 'td_error': 5.904261818541724, 'value_scale': 10.418788704946591, 'discounted_advantage': -8.079649416240995, 'initial_state': 15.314604759216309, 'diff_eval': 3660.8810837909014} step=135000
2025-12-06 13:19.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.49it/s, critic_loss=12.3, conservative_loss=-0.000255, alpha=4.63e-6, actor_loss=-8.72, temp=0.157, temp_loss=0.00218]


2025-12-06 13:20.26 [info     ] CalQL_20251206112128: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.005060449838638305, 'time_algorithm_update': 0.04276108694076538, 'critic_loss': 12.32069073152542, 'conservative_loss': -0.00025506316029350274, 'alpha': 4.631284156857873e-06, 'actor_loss': -8.7197003865242, 'temp': 0.1567498802691698, 'temp_loss': 0.002131849665893242, 'time_step': 0.04811184787750244, 'td_error': 6.711750856144229, 'value_scale': 10.07135305006591, 'discounted_advantage': -7.5425616691315325, 'initial_state': 12.167410850524902, 'diff_eval': 3356.3967212000416} step=136000
2025-12-06 13:20.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.23it/s, critic_loss=12.6, conservative_loss=-0.000231, alpha=4.23e-6, actor_loss=-8.78, temp=0.156, temp_loss=-0.00159]


2025-12-06 13:21.19 [info     ] CalQL_20251206112128: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.005168028354644775, 'time_algorithm_update': 0.043206344604492186, 'critic_loss': 12.554967997074128, 'conservative_loss': -0.00023137604660587385, 'alpha': 4.231995783811726e-06, 'actor_loss': -8.780041743278504, 'temp': 0.15596046674251557, 'temp_loss': -0.0016177392391255126, 'time_step': 0.048664368629455564, 'td_error': 6.871870227652184, 'value_scale': 10.545139390945572, 'discounted_advantage': -8.339335693947499, 'initial_state': 12.673903465270996, 'diff_eval': 3671.173528831828} step=137000
2025-12-06 13:21.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.62it/s, critic_loss=12.5, conservative_loss=-0.000211, alpha=3.87e-6, actor_loss=-8.92, temp=0.157, temp_loss=0.0004] 


2025-12-06 13:22.11 [info     ] CalQL_20251206112128: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.005041722774505615, 'time_algorithm_update': 0.04244912433624268, 'critic_loss': 12.5229141330719, 'conservative_loss': -0.0002113128256460186, 'alpha': 3.867054509328227e-06, 'actor_loss': -8.917772565364837, 'temp': 0.15661682371795177, 'temp_loss': 0.0003090247003710829, 'time_step': 0.047766536474227905, 'td_error': 6.209098730469573, 'value_scale': 10.606894953074894, 'discounted_advantage': -8.382971541137545, 'initial_state': 14.448709487915039, 'diff_eval': 3457.63389901533} step=138000
2025-12-06 13:22.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.66it/s, critic_loss=12.6, conservative_loss=-0.000193, alpha=3.53e-6, actor_loss=-8.96, temp=0.156, temp_loss=0.0009] 


2025-12-06 13:23.03 [info     ] CalQL_20251206112128: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.004976383686065674, 'time_algorithm_update': 0.042417349815368655, 'critic_loss': 12.647351802825927, 'conservative_loss': -0.000192725517146755, 'alpha': 3.5330304742728912e-06, 'actor_loss': -8.956587926864625, 'temp': 0.15630330711603166, 'temp_loss': 0.0008258177764946594, 'time_step': 0.04769014430046081, 'td_error': 6.225306788294162, 'value_scale': 10.765997373673063, 'discounted_advantage': -8.60682414695844, 'initial_state': 14.100899696350098, 'diff_eval': 3367.919575165102} step=139000
2025-12-06 13:23.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.40it/s, critic_loss=12.5, conservative_loss=-0.000177, alpha=3.23e-6, actor_loss=-9.02, temp=0.156, temp_loss=0.000546]


2025-12-06 13:23.55 [info     ] CalQL_20251206112128: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.0051058967113494875, 'time_algorithm_update': 0.04287648701667786, 'critic_loss': 12.560069538593293, 'conservative_loss': -0.0001763658104609931, 'alpha': 3.227732181585452e-06, 'actor_loss': -9.020003481388091, 'temp': 0.15554179580509664, 'temp_loss': 0.0006111917647649534, 'time_step': 0.04828417754173279, 'td_error': 6.714218158082523, 'value_scale': 10.67034240523845, 'discounted_advantage': -8.844851039569757, 'initial_state': 13.606244087219238, 'diff_eval': 3683.0917870787666} step=140000
2025-12-06 13:23.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.94it/s, critic_loss=12.7, conservative_loss=-0.00016, alpha=2.95e-6, actor_loss=-9.08, temp=0.156, temp_loss=-0.00117] 


2025-12-06 13:24.49 [info     ] CalQL_20251206112128: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.005741678714752198, 'time_algorithm_update': 0.04331990361213684, 'critic_loss': 12.748561247110366, 'conservative_loss': -0.0001603647283045575, 'alpha': 2.9487778606380744e-06, 'actor_loss': -9.078607029438018, 'temp': 0.15579725037515163, 'temp_loss': -0.0010392443499295041, 'time_step': 0.04938110280036926, 'td_error': 6.275397762816072, 'value_scale': 10.505185076525422, 'discounted_advantage': -9.101867478664133, 'initial_state': 14.886211395263672, 'diff_eval': 4123.462872651618} step=141000
2025-12-06 13:24.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.90it/s, critic_loss=12.7, conservative_loss=-0.000147, alpha=2.69e-6, actor_loss=-9.11, temp=0.158, temp_loss=-0.00315]


2025-12-06 13:25.40 [info     ] CalQL_20251206112128: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.00490548849105835, 'time_algorithm_update': 0.0419327495098114, 'critic_loss': 12.72060494017601, 'conservative_loss': -0.00014718912327953148, 'alpha': 2.693721118021131e-06, 'actor_loss': -9.113105947494507, 'temp': 0.15848305237293242, 'temp_loss': -0.003093102739192545, 'time_step': 0.04713163781166076, 'td_error': 5.7335154334674545, 'value_scale': 10.573126565287168, 'discounted_advantage': -8.093420681034006, 'initial_state': 15.054631233215332, 'diff_eval': 3454.9821014817203} step=142000
2025-12-06 13:25.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.81it/s, critic_loss=12.7, conservative_loss=-0.000134, alpha=2.46e-6, actor_loss=-9.09, temp=0.16, temp_loss=0.000534]


2025-12-06 13:26.32 [info     ] CalQL_20251206112128: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.004997161865234375, 'time_algorithm_update': 0.04203099942207336, 'critic_loss': 12.697791808605194, 'conservative_loss': -0.00013426701112621232, 'alpha': 2.4605132939541363e-06, 'actor_loss': -9.091776064872741, 'temp': 0.16008492682874204, 'temp_loss': 0.0005102726833429188, 'time_step': 0.04733249235153198, 'td_error': 6.389469980029462, 'value_scale': 10.406359275097257, 'discounted_advantage': -7.973856393025124, 'initial_state': 13.452062606811523, 'diff_eval': 3998.0299640037683} step=143000
2025-12-06 13:26.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.91it/s, critic_loss=12.6, conservative_loss=-0.000122, alpha=2.25e-6, actor_loss=-9.1, temp=0.158, temp_loss=0.002]  


2025-12-06 13:27.26 [info     ] CalQL_20251206112128: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.005326827526092529, 'time_algorithm_update': 0.04376626205444336, 'critic_loss': 12.578172593593598, 'conservative_loss': -0.00012227828937466258, 'alpha': 2.247820101274556e-06, 'actor_loss': -9.094041395664215, 'temp': 0.15804096958041192, 'temp_loss': 0.0019544211514876224, 'time_step': 0.04941070079803467, 'td_error': 6.152993409050961, 'value_scale': 10.763047551178847, 'discounted_advantage': -8.281097810375586, 'initial_state': 14.83532428741455, 'diff_eval': 3527.4817984822494} step=144000
2025-12-06 13:27.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.19it/s, critic_loss=12.9, conservative_loss=-0.000112, alpha=2.05e-6, actor_loss=-9.12, temp=0.157, temp_loss=0.000359]


2025-12-06 13:28.19 [info     ] CalQL_20251206112128: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.005331458330154419, 'time_algorithm_update': 0.04317169046401977, 'critic_loss': 12.85478602862358, 'conservative_loss': -0.00011170448191842297, 'alpha': 2.053675835441027e-06, 'actor_loss': -9.121637496471404, 'temp': 0.15726670932769776, 'temp_loss': 0.0003463336941204034, 'time_step': 0.04880839419364929, 'td_error': 6.203754070036451, 'value_scale': 10.57921958796068, 'discounted_advantage': -8.345503168811973, 'initial_state': 13.656123161315918, 'diff_eval': 3446.0706148287154} step=145000
2025-12-06 13:28.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.42it/s, critic_loss=12.4, conservative_loss=-0.000102, alpha=1.88e-6, actor_loss=-9.07, temp=0.156, temp_loss=0.000822]


2025-12-06 13:29.11 [info     ] CalQL_20251206112128: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.005019902467727661, 'time_algorithm_update': 0.04295666837692261, 'critic_loss': 12.380453595638276, 'conservative_loss': -0.00010225683800672414, 'alpha': 1.8761461908525234e-06, 'actor_loss': -9.072819165706635, 'temp': 0.15589910572767257, 'temp_loss': 0.0009263679253053851, 'time_step': 0.048260931253433226, 'td_error': 6.961738532606229, 'value_scale': 10.738741165300253, 'discounted_advantage': -7.927642817669467, 'initial_state': 12.116342544555664, 'diff_eval': 3467.456432159047} step=146000
2025-12-06 13:29.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.24it/s, critic_loss=12.5, conservative_loss=-9.38e-5, alpha=1.71e-6, actor_loss=-9.06, temp=0.155, temp_loss=0.00039] 


2025-12-06 13:30.04 [info     ] CalQL_20251206112128: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.00512351107597351, 'time_algorithm_update': 0.043250461339950565, 'critic_loss': 12.478633313179015, 'conservative_loss': -9.372041920869379e-05, 'alpha': 1.713319919872447e-06, 'actor_loss': -9.057484263420106, 'temp': 0.15542885874211787, 'temp_loss': 0.0003110539352637716, 'time_step': 0.048683017253875735, 'td_error': 5.935400048702083, 'value_scale': 11.128952692016922, 'discounted_advantage': -8.960050002434755, 'initial_state': 16.40376853942871, 'diff_eval': 3527.1476949682306} step=147000
2025-12-06 13:30.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.37it/s, critic_loss=12.3, conservative_loss=-8.59e-5, alpha=1.57e-6, actor_loss=-9.11, temp=0.157, temp_loss=-0.00248]


2025-12-06 13:30.57 [info     ] CalQL_20251206112128: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.005130258321762085, 'time_algorithm_update': 0.042913467168807985, 'critic_loss': 12.3078628718853, 'conservative_loss': -8.593063009175239e-05, 'alpha': 1.5646517706500162e-06, 'actor_loss': -9.112467105865479, 'temp': 0.1570782127082348, 'temp_loss': -0.0025138955988222734, 'time_step': 0.048344735383987425, 'td_error': 7.023454837883122, 'value_scale': 11.215634584531488, 'discounted_advantage': -9.53612376701396, 'initial_state': 12.795720100402832, 'diff_eval': 3134.3908916891596} step=148000
2025-12-06 13:30.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.27it/s, critic_loss=12.4, conservative_loss=-7.85e-5, alpha=1.43e-6, actor_loss=-9.08, temp=0.16, temp_loss=-0.00226]


2025-12-06 13:31.50 [info     ] CalQL_20251206112128: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.0051491641998291015, 'time_algorithm_update': 0.04308449387550354, 'critic_loss': 12.419322633266448, 'conservative_loss': -7.843573226273293e-05, 'alpha': 1.4289285595623368e-06, 'actor_loss': -9.076805854320526, 'temp': 0.1597569689899683, 'temp_loss': -0.002185049707244616, 'time_step': 0.04854298162460327, 'td_error': 5.7917748671848255, 'value_scale': 11.298446136394052, 'discounted_advantage': -8.754540068557505, 'initial_state': 15.340917587280273, 'diff_eval': 3466.6284868967864} step=149000
2025-12-06 13:31.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.68it/s, critic_loss=12.5, conservative_loss=-7.14e-5, alpha=1.31e-6, actor_loss=-9.11, temp=0.162, temp_loss=-0.00173]


2025-12-06 13:32.42 [info     ] CalQL_20251206112128: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.004985107660293579, 'time_algorithm_update': 0.04235424757003784, 'critic_loss': 12.460646249294282, 'conservative_loss': -7.138054035021924e-05, 'alpha': 1.3054602175088803e-06, 'actor_loss': -9.106066165447235, 'temp': 0.16177913358807564, 'temp_loss': -0.001745691290765535, 'time_step': 0.04762872576713562, 'td_error': 6.056339656846962, 'value_scale': 11.138853264140389, 'discounted_advantage': -8.813370721565315, 'initial_state': 14.0505952835083, 'diff_eval': 3535.2243921960307} step=150000
2025-12-06 13:32.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.86it/s, critic_loss=12.5, conservative_loss=-6.5e-5, alpha=1.19e-6, actor_loss=-9.14, temp=0.163, temp_loss=0.000299] 


2025-12-06 13:33.34 [info     ] CalQL_20251206112128: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.005046797513961792, 'time_algorithm_update': 0.04187035036087036, 'critic_loss': 12.522501843452453, 'conservative_loss': -6.495881370574353e-05, 'alpha': 1.192853465340704e-06, 'actor_loss': -9.140508098125459, 'temp': 0.16265076045691967, 'temp_loss': 0.0004057835206622258, 'time_step': 0.04721680808067322, 'td_error': 6.541343830708019, 'value_scale': 11.44038802380334, 'discounted_advantage': -9.771770487831484, 'initial_state': 14.423303604125977, 'diff_eval': 3612.6539682177117} step=151000
2025-12-06 13:33.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.32it/s, critic_loss=12.3, conservative_loss=-5.96e-5, alpha=1.09e-6, actor_loss=-9.11, temp=0.163, temp_loss=-0.00117]


2025-12-06 13:34.28 [info     ] CalQL_20251206112128: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.005114192962646484, 'time_algorithm_update': 0.04310582542419433, 'critic_loss': 12.282726746559144, 'conservative_loss': -5.955962219377398e-05, 'alpha': 1.0898205638341096e-06, 'actor_loss': -9.112383408546448, 'temp': 0.16327299597859382, 'temp_loss': -0.0011065801330260002, 'time_step': 0.048500367879867556, 'td_error': 6.097417414973556, 'value_scale': 11.24762797698672, 'discounted_advantage': -9.536355179621522, 'initial_state': 14.503883361816406, 'diff_eval': 3591.3168462002673} step=152000
2025-12-06 13:34.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.62it/s, critic_loss=12.4, conservative_loss=-5.44e-5, alpha=9.96e-7, actor_loss=-9.17, temp=0.164, temp_loss=-0.00115]


2025-12-06 13:35.20 [info     ] CalQL_20251206112128: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.005230754613876343, 'time_algorithm_update': 0.0422610285282135, 'critic_loss': 12.37730215215683, 'conservative_loss': -5.433540156445815e-05, 'alpha': 9.955215251125083e-07, 'actor_loss': -9.1691248087883, 'temp': 0.1642695113569498, 'temp_loss': -0.0011464449296472595, 'time_step': 0.047792554855346676, 'td_error': 5.940694055380941, 'value_scale': 11.456280463917707, 'discounted_advantage': -8.960198582065283, 'initial_state': 14.135838508605957, 'diff_eval': 3543.8111622387078} step=153000
2025-12-06 13:35.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.59it/s, critic_loss=12.4, conservative_loss=-4.97e-5, alpha=9.1e-7, actor_loss=-9.18, temp=0.166, temp_loss=-0.00158] 


2025-12-06 13:36.12 [info     ] CalQL_20251206112128: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.005032824516296387, 'time_algorithm_update': 0.04249476218223572, 'critic_loss': 12.421239498853684, 'conservative_loss': -4.965054087006138e-05, 'alpha': 9.094277212398083e-07, 'actor_loss': -9.182295008182525, 'temp': 0.16583651584386824, 'temp_loss': -0.0015593289871467277, 'time_step': 0.04782801651954651, 'td_error': 6.019026061681894, 'value_scale': 11.319180934016233, 'discounted_advantage': -8.751919107777137, 'initial_state': 14.270513534545898, 'diff_eval': 3258.7083434062974} step=154000
2025-12-06 13:36.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.77it/s, critic_loss=12.3, conservative_loss=-4.55e-5, alpha=8.31e-7, actor_loss=-9.19, temp=0.168, temp_loss=-0.00209]


2025-12-06 13:37.06 [info     ] CalQL_20251206112128: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.005171636819839478, 'time_algorithm_update': 0.044315481901168824, 'critic_loss': 12.262479589939117, 'conservative_loss': -4.5506951202696654e-05, 'alpha': 8.306246581923915e-07, 'actor_loss': -9.19043664264679, 'temp': 0.1676678798943758, 'temp_loss': -0.002092374440166168, 'time_step': 0.049793511629104614, 'td_error': 6.4877734718674285, 'value_scale': 11.610808342109744, 'discounted_advantage': -9.176952239281992, 'initial_state': 15.315590858459473, 'diff_eval': 3612.9102282895983} step=155000
2025-12-06 13:37.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.23it/s, critic_loss=12.2, conservative_loss=-4.13e-5, alpha=7.59e-7, actor_loss=-9.2, temp=0.169, temp_loss=0.00115]  


2025-12-06 13:37.59 [info     ] CalQL_20251206112128: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.005076274394989013, 'time_algorithm_update': 0.04329081392288208, 'critic_loss': 12.17987784433365, 'conservative_loss': -4.130929971870501e-05, 'alpha': 7.588873085637715e-07, 'actor_loss': -9.197647205352784, 'temp': 0.1685909006744623, 'temp_loss': 0.0012046330227749422, 'time_step': 0.04866329526901245, 'td_error': 6.131740529336116, 'value_scale': 11.194518550712075, 'discounted_advantage': -9.002909130875866, 'initial_state': 14.137857437133789, 'diff_eval': 3442.520051773875} step=156000
2025-12-06 13:37.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.77it/s, critic_loss=12.3, conservative_loss=-3.79e-5, alpha=6.94e-7, actor_loss=-9.23, temp=0.168, temp_loss=8.9e-5]  


2025-12-06 13:38.51 [info     ] CalQL_20251206112128: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.004974142074584961, 'time_algorithm_update': 0.042121857166290286, 'critic_loss': 12.288303843021392, 'conservative_loss': -3.789488027541665e-05, 'alpha': 6.932735144573598e-07, 'actor_loss': -9.227395911693574, 'temp': 0.16786427289247513, 'temp_loss': 0.00011059225106146186, 'time_step': 0.04740894985198975, 'td_error': 6.509041562561271, 'value_scale': 11.361883613618977, 'discounted_advantage': -9.188005075447785, 'initial_state': 13.297372817993164, 'diff_eval': 3680.748840636175} step=157000
2025-12-06 13:38.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.54it/s, critic_loss=12.1, conservative_loss=-3.47e-5, alpha=6.33e-7, actor_loss=-9.31, temp=0.169, temp_loss=-0.00172]


2025-12-06 13:39.43 [info     ] CalQL_20251206112128: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.005092980146408081, 'time_algorithm_update': 0.04257212829589844, 'critic_loss': 12.124364466667176, 'conservative_loss': -3.469708314878517e-05, 'alpha': 6.332050126616196e-07, 'actor_loss': -9.312174243450166, 'temp': 0.1686645300835371, 'temp_loss': -0.0016373901529586875, 'time_step': 0.047970144033432006, 'td_error': 6.261055901215524, 'value_scale': 11.538453419849727, 'discounted_advantage': -9.371881416404484, 'initial_state': 14.991366386413574, 'diff_eval': 3987.7459676334993} step=158000
2025-12-06 13:39.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.48it/s, critic_loss=12.4, conservative_loss=-3.16e-5, alpha=5.79e-7, actor_loss=-9.41, temp=0.171, temp_loss=-0.00265]


2025-12-06 13:40.36 [info     ] CalQL_20251206112128: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.005031773805618286, 'time_algorithm_update': 0.04274032711982727, 'critic_loss': 12.355729837417602, 'conservative_loss': -3.15833305594424e-05, 'alpha': 5.784510570947532e-07, 'actor_loss': -9.411830403327942, 'temp': 0.17110624645650388, 'temp_loss': -0.0026681018816307185, 'time_step': 0.04807075214385986, 'td_error': 5.9687053629442355, 'value_scale': 11.712648604461753, 'discounted_advantage': -9.66807440630075, 'initial_state': 15.552033424377441, 'diff_eval': 3416.106326200272} step=159000
2025-12-06 13:40.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.59it/s, critic_loss=12.3, conservative_loss=-2.89e-5, alpha=5.29e-7, actor_loss=-9.58, temp=0.172, temp_loss=0.000394]


2025-12-06 13:41.28 [info     ] CalQL_20251206112128: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.005056214094161987, 'time_algorithm_update': 0.042514451026916505, 'critic_loss': 12.27198490691185, 'conservative_loss': -2.8883794788271188e-05, 'alpha': 5.284475002440558e-07, 'actor_loss': -9.578862503051758, 'temp': 0.17164620384573936, 'temp_loss': 0.0005083409016951918, 'time_step': 0.04786285281181336, 'td_error': 6.3362393584763845, 'value_scale': 11.864685070046754, 'discounted_advantage': -9.745856047697457, 'initial_state': 14.947216033935547, 'diff_eval': 3535.118276632867} step=160000
2025-12-06 13:41.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.36it/s, critic_loss=12.3, conservative_loss=-2.64e-5, alpha=4.83e-7, actor_loss=-9.7, temp=0.173, temp_loss=-0.00355] 


2025-12-06 13:42.21 [info     ] CalQL_20251206112128: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.005146476507186889, 'time_algorithm_update': 0.04294371461868286, 'critic_loss': 12.343651571035386, 'conservative_loss': -2.6377102463811753e-05, 'alpha': 4.827483846270297e-07, 'actor_loss': -9.706741920471192, 'temp': 0.17338987469673156, 'temp_loss': -0.0035626266446197406, 'time_step': 0.048407676458358764, 'td_error': 6.478634336678429, 'value_scale': 12.328478916252324, 'discounted_advantage': -10.209828732004311, 'initial_state': 14.3573579788208, 'diff_eval': 3598.403117581496} step=161000
2025-12-06 13:42.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.39it/s, critic_loss=12.7, conservative_loss=-2.4e-5, alpha=4.41e-7, actor_loss=-9.89, temp=0.177, temp_loss=-0.00155] 


2025-12-06 13:43.13 [info     ] CalQL_20251206112128: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.005079679012298584, 'time_algorithm_update': 0.042900831937789916, 'critic_loss': 12.712552461147308, 'conservative_loss': -2.3958117573783967e-05, 'alpha': 4.4109341607168063e-07, 'actor_loss': -9.89073460483551, 'temp': 0.17659609147906302, 'temp_loss': -0.0015870382502907888, 'time_step': 0.04828910374641419, 'td_error': 6.503126876872305, 'value_scale': 13.03920143984152, 'discounted_advantage': -10.327055884808146, 'initial_state': 16.497533798217773, 'diff_eval': 3464.9149925318397} step=162000
2025-12-06 13:43.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.44it/s, critic_loss=12.8, conservative_loss=-2.17e-5, alpha=4.03e-7, actor_loss=-10.1, temp=0.178, temp_loss=0.000898]


2025-12-06 13:44.06 [info     ] CalQL_20251206112128: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.005074376583099365, 'time_algorithm_update': 0.0427990140914917, 'critic_loss': 12.753344784259797, 'conservative_loss': -2.1682576010789488e-05, 'alpha': 4.031624945355361e-07, 'actor_loss': -10.083637927055358, 'temp': 0.17754141809046267, 'temp_loss': 0.0009676269682822749, 'time_step': 0.048179233551025394, 'td_error': 6.807712811468727, 'value_scale': 13.010320925029667, 'discounted_advantage': -11.033694341257444, 'initial_state': 16.363191604614258, 'diff_eval': 3600.481605882335} step=163000
2025-12-06 13:44.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.29it/s, critic_loss=13, conservative_loss=-1.98e-5, alpha=3.69e-7, actor_loss=-10.2, temp=0.175, temp_loss=0.00139] 


2025-12-06 13:44.59 [info     ] CalQL_20251206112128: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.005189949512481689, 'time_algorithm_update': 0.04304540014266968, 'critic_loss': 13.020876469612121, 'conservative_loss': -1.983711250977649e-05, 'alpha': 3.684986291716541e-07, 'actor_loss': -10.210060669898986, 'temp': 0.17515515568852424, 'temp_loss': 0.0012894223043695091, 'time_step': 0.0485371208190918, 'td_error': 6.946004954250503, 'value_scale': 12.830741977576738, 'discounted_advantage': -11.377624971323465, 'initial_state': 15.797149658203125, 'diff_eval': 3271.8904907024485} step=164000
2025-12-06 13:44.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.90it/s, critic_loss=13.1, conservative_loss=-1.8e-5, alpha=3.37e-7, actor_loss=-10.3, temp=0.177, temp_loss=-0.00166]


2025-12-06 13:45.52 [info     ] CalQL_20251206112128: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.0049815793037414555, 'time_algorithm_update': 0.04430821776390076, 'critic_loss': 13.07380250620842, 'conservative_loss': -1.8010525074714677e-05, 'alpha': 3.3674241768721913e-07, 'actor_loss': -10.34125462627411, 'temp': 0.17657195061445236, 'temp_loss': -0.0016236975971842184, 'time_step': 0.0495787889957428, 'td_error': 7.189827266725775, 'value_scale': 12.92720218634503, 'discounted_advantage': -10.52062492569308, 'initial_state': 15.438000679016113, 'diff_eval': 3706.565334794559} step=165000
2025-12-06 13:45.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.44it/s, critic_loss=13.6, conservative_loss=-1.64e-5, alpha=3.08e-7, actor_loss=-10.5, temp=0.177, temp_loss=6.98e-5] 


2025-12-06 13:46.45 [info     ] CalQL_20251206112128: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.005043809652328491, 'time_algorithm_update': 0.04281484246253967, 'critic_loss': 13.600046017885209, 'conservative_loss': -1.6437145728559698e-05, 'alpha': 3.0776554780231893e-07, 'actor_loss': -10.504227946281434, 'temp': 0.17680195209383964, 'temp_loss': 0.00014654674264602362, 'time_step': 0.048165988445281985, 'td_error': 7.232614029945396, 'value_scale': 12.635943931762414, 'discounted_advantage': -10.658057273851469, 'initial_state': 15.827231407165527, 'diff_eval': 3293.093482138758} step=166000
2025-12-06 13:46.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.99it/s, critic_loss=13.5, conservative_loss=-1.49e-5, alpha=2.81e-7, actor_loss=-10.6, temp=0.178, temp_loss=-0.00127]


2025-12-06 13:47.36 [info     ] CalQL_20251206112128: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.004942537307739258, 'time_algorithm_update': 0.04171214890480041, 'critic_loss': 13.452079730510711, 'conservative_loss': -1.4939280793441867e-05, 'alpha': 2.8125354393182533e-07, 'actor_loss': -10.642341215133667, 'temp': 0.1780809539705515, 'temp_loss': -0.001213457561796531, 'time_step': 0.046940042018890384, 'td_error': 6.851992913347964, 'value_scale': 13.215704225202787, 'discounted_advantage': -10.896294455468478, 'initial_state': 17.27374267578125, 'diff_eval': 3442.856247709208} step=167000
2025-12-06 13:47.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.60it/s, critic_loss=13.9, conservative_loss=-1.36e-5, alpha=2.57e-7, actor_loss=-10.8, temp=0.179, temp_loss=-0.000919]


2025-12-06 13:48.28 [info     ] CalQL_20251206112128: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.005106272220611573, 'time_algorithm_update': 0.0424582633972168, 'critic_loss': 13.946571307897567, 'conservative_loss': -1.3604440006020013e-05, 'alpha': 2.570697312478387e-07, 'actor_loss': -10.819807923316956, 'temp': 0.17898175674676894, 'temp_loss': -0.0009812955914530904, 'time_step': 0.04785180640220642, 'td_error': 6.406429477458115, 'value_scale': 12.90858762366861, 'discounted_advantage': -10.431875115847836, 'initial_state': 17.44782257080078, 'diff_eval': 3298.7513750726866} step=168000
2025-12-06 13:48.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.59it/s, critic_loss=13.9, conservative_loss=-1.24e-5, alpha=2.35e-7, actor_loss=-11, temp=0.181, temp_loss=-0.00149]


2025-12-06 13:49.20 [info     ] CalQL_20251206112128: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.004987234354019165, 'time_algorithm_update': 0.04253409671783447, 'critic_loss': 13.949779369354248, 'conservative_loss': -1.238951644700137e-05, 'alpha': 2.3493606718716364e-07, 'actor_loss': -11.0264461517334, 'temp': 0.18083964715898038, 'temp_loss': -0.0015548334158957005, 'time_step': 0.047821681499481204, 'td_error': 7.585609277341245, 'value_scale': 13.538552208098661, 'discounted_advantage': -11.064522738692038, 'initial_state': 16.549793243408203, 'diff_eval': 3577.693681793808} step=169000
2025-12-06 13:49.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.57it/s, critic_loss=14.6, conservative_loss=-1.12e-5, alpha=2.15e-7, actor_loss=-11.3, temp=0.181, temp_loss=0.000631]


2025-12-06 13:50.13 [info     ] CalQL_20251206112128: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.005117160081863403, 'time_algorithm_update': 0.042472429752349855, 'critic_loss': 14.53548202419281, 'conservative_loss': -1.1225994582673593e-05, 'alpha': 2.147622652159953e-07, 'actor_loss': -11.265134822845459, 'temp': 0.18144793184101582, 'temp_loss': 0.0005146328726550564, 'time_step': 0.04788393259048462, 'td_error': 7.557813214652724, 'value_scale': 13.833329561395198, 'discounted_advantage': -12.01019794066582, 'initial_state': 16.670265197753906, 'diff_eval': 3570.2549362618674} step=170000
2025-12-06 13:50.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.64it/s, critic_loss=14.8, conservative_loss=-1.01e-5, alpha=1.96e-7, actor_loss=-11.5, temp=0.18, temp_loss=0.000516] 


2025-12-06 13:51.04 [info     ] CalQL_20251206112128: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.005044330835342407, 'time_algorithm_update': 0.04236172866821289, 'critic_loss': 14.841789509773255, 'conservative_loss': -1.0140553474229819e-05, 'alpha': 1.9639456975539815e-07, 'actor_loss': -11.524335854530335, 'temp': 0.18016821901500224, 'temp_loss': 0.0005229978805873543, 'time_step': 0.04770960831642151, 'td_error': 8.5094145208145, 'value_scale': 13.616962093247096, 'discounted_advantage': -11.894734792030837, 'initial_state': 14.46567153930664, 'diff_eval': 3769.402069503109} step=171000
2025-12-06 13:51.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.28it/s, critic_loss=15.4, conservative_loss=-9.19e-6, alpha=1.8e-7, actor_loss=-11.8, temp=0.18, temp_loss=-5.63e-5] 


2025-12-06 13:51.57 [info     ] CalQL_20251206112128: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.0051404938697814945, 'time_algorithm_update': 0.04314162874221802, 'critic_loss': 15.376040184497834, 'conservative_loss': -9.190369423777156e-06, 'alpha': 1.796134533122995e-07, 'actor_loss': -11.835789985656739, 'temp': 0.18033971871435642, 'temp_loss': -6.727712030988186e-05, 'time_step': 0.04857360744476318, 'td_error': 7.364540140168154, 'value_scale': 14.183204201391248, 'discounted_advantage': -11.364226550883961, 'initial_state': 18.04498863220215, 'diff_eval': 3702.3890510310403} step=172000
2025-12-06 13:51.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.37it/s, critic_loss=15.6, conservative_loss=-8.31e-6, alpha=1.64e-7, actor_loss=-12.2, temp=0.18, temp_loss=0.000686]


2025-12-06 13:52.50 [info     ] CalQL_20251206112128: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.005127326488494873, 'time_algorithm_update': 0.04289256811141968, 'critic_loss': 15.655932859420776, 'conservative_loss': -8.311135640724388e-06, 'alpha': 1.6425297846467402e-07, 'actor_loss': -12.174523362159729, 'temp': 0.18025105330348015, 'temp_loss': 0.0006410532843437977, 'time_step': 0.04832649540901184, 'td_error': 7.870759624523381, 'value_scale': 14.043961981985927, 'discounted_advantage': -12.671485771811174, 'initial_state': 18.055471420288086, 'diff_eval': 3655.596184700123} step=173000
2025-12-06 13:52.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.55it/s, critic_loss=16.2, conservative_loss=-7.51e-6, alpha=1.5e-7, actor_loss=-12.5, temp=0.178, temp_loss=0.00127] 


2025-12-06 13:53.42 [info     ] CalQL_20251206112128: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.0050085711479187014, 'time_algorithm_update': 0.04262859392166138, 'critic_loss': 16.215715457439423, 'conservative_loss': -7.508823325224512e-06, 'alpha': 1.5025392544032455e-07, 'actor_loss': -12.55136386203766, 'temp': 0.17844733050465583, 'temp_loss': 0.0012753497655503452, 'time_step': 0.04794583654403686, 'td_error': 7.935218555048991, 'value_scale': 14.832170586043162, 'discounted_advantage': -13.462184293407887, 'initial_state': 18.210927963256836, 'diff_eval': 4615.305859160371} step=174000
2025-12-06 13:53.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.83it/s, critic_loss=16.8, conservative_loss=-6.76e-6, alpha=1.38e-7, actor_loss=-13, temp=0.177, temp_loss=0.00258]  


2025-12-06 13:54.36 [info     ] CalQL_20251206112128: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.005145646333694458, 'time_algorithm_update': 0.04418811774253845, 'critic_loss': 16.821334983825682, 'conservative_loss': -6.757378890142718e-06, 'alpha': 1.3745748216820175e-07, 'actor_loss': -12.966255571365357, 'temp': 0.17668038009107112, 'temp_loss': 0.002650602383771911, 'time_step': 0.049644711017608645, 'td_error': 8.259687269455023, 'value_scale': 14.595311360701462, 'discounted_advantage': -11.894529331317933, 'initial_state': 17.765308380126953, 'diff_eval': 3662.523259992406} step=175000
2025-12-06 13:54.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.28it/s, critic_loss=17.7, conservative_loss=-6.15e-6, alpha=1.26e-7, actor_loss=-13.5, temp=0.174, temp_loss=0.00102]


2025-12-06 13:55.29 [info     ] CalQL_20251206112128: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.005151896476745605, 'time_algorithm_update': 0.04303842568397522, 'critic_loss': 17.644574595212937, 'conservative_loss': -6.152025434857933e-06, 'alpha': 1.257611755391963e-07, 'actor_loss': -13.458538764953614, 'temp': 0.17366862590610982, 'temp_loss': 0.0011135269839433022, 'time_step': 0.04849608850479126, 'td_error': 8.80179145185934, 'value_scale': 15.3591267635885, 'discounted_advantage': -15.120888478931954, 'initial_state': 18.92877769470215, 'diff_eval': 4174.111083243653} step=176000
2025-12-06 13:55.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.09it/s, critic_loss=18, conservative_loss=-5.5e-6, alpha=1.15e-7, actor_loss=-14, temp=0.172, temp_loss=0.00264]      


2025-12-06 13:56.22 [info     ] CalQL_20251206112128: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.005162660837173462, 'time_algorithm_update': 0.04357061052322388, 'critic_loss': 17.981160960674284, 'conservative_loss': -5.495076763054385e-06, 'alpha': 1.1506005192529756e-07, 'actor_loss': -14.023116946220398, 'temp': 0.1724189608693123, 'temp_loss': 0.0024760019470704718, 'time_step': 0.04902793526649475, 'td_error': 8.871108513213988, 'value_scale': 15.93529869003373, 'discounted_advantage': -14.498753946975585, 'initial_state': 19.192218780517578, 'diff_eval': 3760.818146112264} step=177000
2025-12-06 13:56.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.66it/s, critic_loss=18.9, conservative_loss=-4.91e-6, alpha=1.05e-7, actor_loss=-14.7, temp=0.17, temp_loss=0.000224] 


2025-12-06 13:57.14 [info     ] CalQL_20251206112128: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.0050783085823059085, 'time_algorithm_update': 0.042286501884460446, 'critic_loss': 18.861631041526795, 'conservative_loss': -4.912393441600216e-06, 'alpha': 1.0534830195041422e-07, 'actor_loss': -14.684983762741089, 'temp': 0.17000838497281073, 'temp_loss': 0.00014260415791068226, 'time_step': 0.0476648337841034, 'td_error': 9.085669001714319, 'value_scale': 16.34573788658309, 'discounted_advantage': -14.891228416770705, 'initial_state': 19.479175567626953, 'diff_eval': 4406.330025072599} step=178000
2025-12-06 13:57.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.05it/s, critic_loss=20, conservative_loss=-4.41e-6, alpha=9.65e-8, actor_loss=-15.4, temp=0.172, temp_loss=-0.000988]


2025-12-06 13:58.08 [info     ] CalQL_20251206112128: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.0052382035255432126, 'time_algorithm_update': 0.04355002498626709, 'critic_loss': 20.02900454711914, 'conservative_loss': -4.402605556379058e-06, 'alpha': 9.650515119119518e-08, 'actor_loss': -15.416744359016418, 'temp': 0.17154301790893078, 'temp_loss': -0.0009198763004387728, 'time_step': 0.049097975730896, 'td_error': 9.447441293973915, 'value_scale': 17.06490818920724, 'discounted_advantage': -16.41195464636419, 'initial_state': 20.366769790649414, 'diff_eval': 4362.7922032531005} step=179000
2025-12-06 13:58.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.53it/s, critic_loss=21, conservative_loss=-3.92e-6, alpha=8.84e-8, actor_loss=-16.3, temp=0.171, temp_loss=0.000407] 


2025-12-06 13:59.00 [info     ] CalQL_20251206112128: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.00507223892211914, 'time_algorithm_update': 0.04264672541618347, 'critic_loss': 20.975903897762297, 'conservative_loss': -3.91579876236392e-06, 'alpha': 8.84112875212395e-08, 'actor_loss': -16.283530117988587, 'temp': 0.17105450227856636, 'temp_loss': 0.0003891712527256459, 'time_step': 0.04800703239440918, 'td_error': 10.384159184587636, 'value_scale': 17.618494114689437, 'discounted_advantage': -17.2074220124614, 'initial_state': 19.545621871948242, 'diff_eval': 4665.717664230708} step=180000
2025-12-06 13:59.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.19it/s, critic_loss=22.8, conservative_loss=-3.51e-6, alpha=8.1e-8, actor_loss=-17.2, temp=0.172, temp_loss=-0.00184] 


2025-12-06 13:59.53 [info     ] CalQL_20251206112128: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.005080474615097046, 'time_algorithm_update': 0.04337230134010315, 'critic_loss': 22.802808649539948, 'conservative_loss': -3.5083308639514143e-06, 'alpha': 8.09996644761668e-08, 'actor_loss': -17.226793849945068, 'temp': 0.17244039209187031, 'temp_loss': -0.0019225690494058653, 'time_step': 0.04876443362236023, 'td_error': 10.91889382418981, 'value_scale': 18.037075479773293, 'discounted_advantage': -18.11418935373311, 'initial_state': 19.466991424560547, 'diff_eval': 4659.891619169916} step=181000
2025-12-06 13:59.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.81it/s, critic_loss=24.4, conservative_loss=-3.08e-6, alpha=7.43e-8, actor_loss=-18.4, temp=0.174, temp_loss=-0.00224]


2025-12-06 14:00.45 [info     ] CalQL_20251206112128: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.0049961872100830074, 'time_algorithm_update': 0.042053470849990844, 'critic_loss': 24.385959462165832, 'conservative_loss': -3.075389957757579e-06, 'alpha': 7.426037388569285e-08, 'actor_loss': -18.36184756088257, 'temp': 0.17440751361846923, 'temp_loss': -0.0023154870230937376, 'time_step': 0.04734279227256775, 'td_error': 11.096911565190835, 'value_scale': 18.877121992752716, 'discounted_advantage': -18.41327704484863, 'initial_state': 20.8846378326416, 'diff_eval': 3966.7781583070428} step=182000
2025-12-06 14:00.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.56it/s, critic_loss=26.3, conservative_loss=-2.68e-6, alpha=6.82e-8, actor_loss=-19.7, temp=0.177, temp_loss=-0.0029] 


2025-12-06 14:01.37 [info     ] CalQL_20251206112128: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.005074999570846558, 'time_algorithm_update': 0.042559754133224485, 'critic_loss': 26.29362201118469, 'conservative_loss': -2.677811598800872e-06, 'alpha': 6.81789660177401e-08, 'actor_loss': -19.706242116928102, 'temp': 0.17728294736146927, 'temp_loss': -0.0028957250401144847, 'time_step': 0.0479256854057312, 'td_error': 13.10045153177712, 'value_scale': 20.397064753335783, 'discounted_advantage': -21.619136117546173, 'initial_state': 21.444570541381836, 'diff_eval': 4950.196604596978} step=183000
2025-12-06 14:01.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:47<00:00, 20.87it/s, critic_loss=28.9, conservative_loss=-2.37e-6, alpha=6.26e-8, actor_loss=-21.1, temp=0.181, temp_loss=-0.00117]


2025-12-06 14:02.28 [info     ] CalQL_20251206112128: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.004980280637741089, 'time_algorithm_update': 0.041896028995513916, 'critic_loss': 28.88296041870117, 'conservative_loss': -2.370434873114391e-06, 'alpha': 6.261038121380125e-08, 'actor_loss': -21.059636337280274, 'temp': 0.18060077936947347, 'temp_loss': -0.0011537013131892309, 'time_step': 0.04717711067199707, 'td_error': 13.103524793667715, 'value_scale': 21.711378916120665, 'discounted_advantage': -21.746128038738995, 'initial_state': 24.626806259155273, 'diff_eval': 5739.64264263831} step=184000
2025-12-06 14:02.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:51<00:00, 19.36it/s, critic_loss=31.8, conservative_loss=-2.04e-6, alpha=5.76e-8, actor_loss=-22.6, temp=0.183, temp_loss=-0.00531]


2025-12-06 14:03.24 [info     ] CalQL_20251206112128: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.006038739919662476, 'time_algorithm_update': 0.04456288623809814, 'critic_loss': 31.83536388015747, 'conservative_loss': -2.037608059026752e-06, 'alpha': 5.754250435785479e-08, 'actor_loss': -22.582569051742553, 'temp': 0.18305309011042117, 'temp_loss': -0.0053426803402253425, 'time_step': 0.05090554761886597, 'td_error': 14.854322328437704, 'value_scale': 23.646563224720847, 'discounted_advantage': -22.874037814753652, 'initial_state': 23.81505584716797, 'diff_eval': 4009.6398181834415} step=185000
2025-12-06 14:03.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.22it/s, critic_loss=34.7, conservative_loss=-1.74e-6, alpha=5.3e-8, actor_loss=-24.3, temp=0.19, temp_loss=-0.0053]  


2025-12-06 14:04.17 [info     ] CalQL_20251206112128: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.005149072408676148, 'time_algorithm_update': 0.04324666023254395, 'critic_loss': 34.761691917419434, 'conservative_loss': -1.7346572471979015e-06, 'alpha': 5.2951395286271466e-08, 'actor_loss': -24.267499071121215, 'temp': 0.19018073743581773, 'temp_loss': -0.005201027425006032, 'time_step': 0.048691261768341064, 'td_error': 15.713840423950431, 'value_scale': 24.319567039085914, 'discounted_advantage': -25.634996473932837, 'initial_state': 25.288066864013672, 'diff_eval': 5098.275749659773} step=186000
2025-12-06 14:04.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.62it/s, critic_loss=37.6, conservative_loss=-1.44e-6, alpha=4.89e-8, actor_loss=-26.1, temp=0.195, temp_loss=-0.00379]


2025-12-06 14:05.11 [info     ] CalQL_20251206112128: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.005487279176712036, 'time_algorithm_update': 0.04445992302894592, 'critic_loss': 37.58160122966766, 'conservative_loss': -1.4335070904394343e-06, 'alpha': 4.8860138946338335e-08, 'actor_loss': -26.10966170310974, 'temp': 0.19533873449265957, 'temp_loss': -0.003789788193302229, 'time_step': 0.05024363422393799, 'td_error': 17.74486194502686, 'value_scale': 26.483518578424388, 'discounted_advantage': -28.143580272191727, 'initial_state': 27.288789749145508, 'diff_eval': 5996.577984989623} step=187000
2025-12-06 14:05.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.67it/s, critic_loss=41.9, conservative_loss=-1.22e-6, alpha=4.52e-8, actor_loss=-28, temp=0.2, temp_loss=-0.00314]   


2025-12-06 14:06.03 [info     ] CalQL_20251206112128: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.00497783637046814, 'time_algorithm_update': 0.04233782458305359, 'critic_loss': 41.87886626815796, 'conservative_loss': -1.2169506544807974e-06, 'alpha': 4.514329936355921e-08, 'actor_loss': -27.96425599861145, 'temp': 0.19985213777422906, 'temp_loss': -0.003128658451139927, 'time_step': 0.047617372035980224, 'td_error': 16.81222054202274, 'value_scale': 27.013669137328503, 'discounted_advantage': -26.517705802306295, 'initial_state': 28.6334171295166, 'diff_eval': 4494.298119044487} step=188000
2025-12-06 14:06.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:49<00:00, 20.41it/s, critic_loss=45.5, conservative_loss=-9.84e-7, alpha=4.18e-8, actor_loss=-29.9, temp=0.204, temp_loss=-0.00342]


2025-12-06 14:06.56 [info     ] CalQL_20251206112128: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.005099535703659058, 'time_algorithm_update': 0.042845868110656736, 'critic_loss': 45.52196272468567, 'conservative_loss': -9.828453756455248e-07, 'alpha': 4.176671066957738e-08, 'actor_loss': -29.906154220581055, 'temp': 0.20395750892162323, 'temp_loss': -0.0034040052168420516, 'time_step': 0.048248961925506595, 'td_error': 17.655596319028298, 'value_scale': 28.50144345042267, 'discounted_advantage': -28.229006713512593, 'initial_state': 30.491981506347656, 'diff_eval': 5391.142744120082} step=189000
2025-12-06 14:06.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.82it/s, critic_loss=49.6, conservative_loss=-8.09e-7, alpha=3.88e-8, actor_loss=-31.7, temp=0.209, temp_loss=-0.00446]


2025-12-06 14:07.47 [info     ] CalQL_20251206112128: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.005014343023300171, 'time_algorithm_update': 0.041969079971313474, 'critic_loss': 49.61480434036255, 'conservative_loss': -8.083501875830734e-07, 'alpha': 3.8753017097548085e-08, 'actor_loss': -31.68519660568237, 'temp': 0.20899664357304573, 'temp_loss': -0.004486376902379561, 'time_step': 0.04728550744056702, 'td_error': 20.61066307668076, 'value_scale': 29.35669201459464, 'discounted_advantage': -28.175934822454565, 'initial_state': 28.831865310668945, 'diff_eval': 5295.953946502854} step=190000
2025-12-06 14:07.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.56it/s, critic_loss=53.6, conservative_loss=-6.73e-7, alpha=3.6e-8, actor_loss=-33.2, temp=0.215, temp_loss=-0.00526]


2025-12-06 14:08.40 [info     ] CalQL_20251206112128: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.005059499263763428, 'time_algorithm_update': 0.04251702332496643, 'critic_loss': 53.60842475700378, 'conservative_loss': -6.733191647327886e-07, 'alpha': 3.597749589445698e-08, 'actor_loss': -33.247522686004636, 'temp': 0.21535470196604728, 'temp_loss': -0.005465332832362037, 'time_step': 0.04789270305633545, 'td_error': 20.13551501654008, 'value_scale': 30.675757142069553, 'discounted_advantage': -30.559813287280477, 'initial_state': 35.49454116821289, 'diff_eval': 5484.932662339855} step=191000
2025-12-06 14:08.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.62it/s, critic_loss=57.6, conservative_loss=-5.29e-7, alpha=3.36e-8, actor_loss=-34.7, temp=0.222, temp_loss=-0.00603]


2025-12-06 14:09.32 [info     ] CalQL_20251206112128: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.005059201002120972, 'time_algorithm_update': 0.04245879602432251, 'critic_loss': 57.63430976486206, 'conservative_loss': -5.286033481759489e-07, 'alpha': 3.3545084274777535e-08, 'actor_loss': -34.74553735160828, 'temp': 0.2220678745508194, 'temp_loss': -0.0061494058100506665, 'time_step': 0.04781333947181702, 'td_error': 20.162166713989077, 'value_scale': 30.95362587315901, 'discounted_advantage': -29.150946560177545, 'initial_state': 36.663143157958984, 'diff_eval': 6192.639159789815} step=192000
2025-12-06 14:09.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.64it/s, critic_loss=60, conservative_loss=-4.3e-7, alpha=3.13e-8, actor_loss=-35.7, temp=0.231, temp_loss=-0.00605]  


2025-12-06 14:10.24 [info     ] CalQL_20251206112128: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.005073427677154541, 'time_algorithm_update': 0.04237345004081726, 'critic_loss': 60.046922765731814, 'conservative_loss': -4.307355991675088e-07, 'alpha': 3.129886851738206e-08, 'actor_loss': -35.67524333572388, 'temp': 0.23076637549698353, 'temp_loss': -0.006208528689807281, 'time_step': 0.04774694490432739, 'td_error': 21.677209365248324, 'value_scale': 30.92367024058968, 'discounted_advantage': -28.40654289562678, 'initial_state': 34.22520446777344, 'diff_eval': 5765.617607224305} step=193000
2025-12-06 14:10.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.52it/s, critic_loss=62.1, conservative_loss=-3.99e-7, alpha=2.92e-8, actor_loss=-36.1, temp=0.238, temp_loss=-0.00532]


2025-12-06 14:11.16 [info     ] CalQL_20251206112128: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.005099419832229614, 'time_algorithm_update': 0.04258205842971802, 'critic_loss': 62.040246007919315, 'conservative_loss': -3.997800634589765e-07, 'alpha': 2.9204888825518082e-08, 'actor_loss': -36.109890224456784, 'temp': 0.23815673190355302, 'temp_loss': -0.005240402554511092, 'time_step': 0.047981428623199464, 'td_error': 23.564266259489898, 'value_scale': 29.99061883157486, 'discounted_advantage': -26.58677922933812, 'initial_state': 32.7791633605957, 'diff_eval': 6708.40361225471} step=194000
2025-12-06 14:11.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.54it/s, critic_loss=63.7, conservative_loss=-3.38e-7, alpha=2.73e-8, actor_loss=-36, temp=0.245, temp_loss=-0.00619] 


2025-12-06 14:12.09 [info     ] CalQL_20251206112128: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.005055984973907471, 'time_algorithm_update': 0.042621632337570194, 'critic_loss': 63.70714898872375, 'conservative_loss': -3.37185424548192e-07, 'alpha': 2.7250887063345886e-08, 'actor_loss': -35.97020149612427, 'temp': 0.24461226838827133, 'temp_loss': -0.006089610897703096, 'time_step': 0.047976075887680054, 'td_error': 22.731299588682315, 'value_scale': 28.38242908089741, 'discounted_advantage': -24.88542619752435, 'initial_state': 32.686195373535156, 'diff_eval': 7121.910667789402} step=195000
2025-12-06 14:12.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:50<00:00, 19.98it/s, critic_loss=64.1, conservative_loss=-3.21e-7, alpha=2.54e-8, actor_loss=-35.1, temp=0.254, temp_loss=-0.00734]


2025-12-06 14:13.03 [info     ] CalQL_20251206112128: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.005370345830917359, 'time_algorithm_update': 0.043665851831436155, 'critic_loss': 64.02709822654724, 'conservative_loss': -3.223604802649049e-07, 'alpha': 2.537066901275864e-08, 'actor_loss': -35.0360934715271, 'temp': 0.25368829031288626, 'temp_loss': -0.007412642905022949, 'time_step': 0.049328320026397705, 'td_error': 20.89225878272358, 'value_scale': 26.501034482557884, 'discounted_advantage': -23.25736672321569, 'initial_state': 32.817100524902344, 'diff_eval': 6239.746738481898} step=196000
2025-12-06 14:13.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.55it/s, critic_loss=64.2, conservative_loss=-3.54e-7, alpha=2.34e-8, actor_loss=-33.5, temp=0.264, temp_loss=-0.00891]


2025-12-06 14:13.55 [info     ] CalQL_20251206112128: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.005127594947814941, 'time_algorithm_update': 0.0425044093132019, 'critic_loss': 64.11631082344056, 'conservative_loss': -3.5479525323101057e-07, 'alpha': 2.3416807513498837e-08, 'actor_loss': -33.45882013130188, 'temp': 0.26362612038850786, 'temp_loss': -0.008933186457958072, 'time_step': 0.047932039260864256, 'td_error': 21.57888693692376, 'value_scale': 25.067968075971642, 'discounted_advantage': -21.078145535264476, 'initial_state': 30.928363800048828, 'diff_eval': 6256.83577401731} step=197000
2025-12-06 14:13.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.46it/s, critic_loss=61.5, conservative_loss=-3.81e-7, alpha=2.15e-8, actor_loss=-31.2, temp=0.272, temp_loss=-0.00334]


2025-12-06 14:14.48 [info     ] CalQL_20251206112128: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.005068559408187867, 'time_algorithm_update': 0.04275270438194275, 'critic_loss': 61.399059177398684, 'conservative_loss': -3.810433503206312e-07, 'alpha': 2.1455427837935304e-08, 'actor_loss': -31.241142042160035, 'temp': 0.2720737738907337, 'temp_loss': -0.0033916397684952246, 'time_step': 0.04813035249710083, 'td_error': 19.59537053226098, 'value_scale': 22.683663780427846, 'discounted_advantage': -18.50788561201597, 'initial_state': 31.7165470123291, 'diff_eval': 7435.39966519317} step=198000
2025-12-06 14:14.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.69it/s, critic_loss=56.9, conservative_loss=-4.21e-7, alpha=1.96e-8, actor_loss=-28.5, temp=0.275, temp_loss=-0.00233]


2025-12-06 14:15.40 [info     ] CalQL_20251206112128: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.005032776594161987, 'time_algorithm_update': 0.04227984356880188, 'critic_loss': 56.82188724899292, 'conservative_loss': -4.218811099185871e-07, 'alpha': 1.955140609055661e-08, 'actor_loss': -28.51973804092407, 'temp': 0.274694921284914, 'temp_loss': -0.0020116152458358556, 'time_step': 0.047618873357772824, 'td_error': 19.484727914495572, 'value_scale': 19.41228454916645, 'discounted_advantage': -16.856491886804697, 'initial_state': 24.786632537841797, 'diff_eval': 7779.982830803817} step=199000
2025-12-06 14:15.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:48<00:00, 20.73it/s, critic_loss=52.3, conservative_loss=-4.47e-7, alpha=1.78e-8, actor_loss=-25.2, temp=0.275, temp_loss=0.00155]


2025-12-06 14:16.31 [info     ] CalQL_20251206112128: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.0050082533359527585, 'time_algorithm_update': 0.04226784014701843, 'critic_loss': 52.213556146621706, 'conservative_loss': -4.473659218504622e-07, 'alpha': 1.7747451066085773e-08, 'actor_loss': -25.144840084075927, 'temp': 0.27504540434479713, 'temp_loss': 0.0015853817723691464, 'time_step': 0.04756490635871887, 'td_error': 15.91112026150554, 'value_scale': 18.34350464887736, 'discounted_advantage': -15.713228868725404, 'initial_state': 25.918537139892578, 'diff_eval': 8029.186980147664} step=200000
2025-12-06 14:16.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\CalQL_20251206112128\model_200000.d3
Training model:  AWAC
2025-12-06 14:16.31 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_sig

Epoch 1/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.44it/s, critic_loss=4.72, actor_loss=4.51e+3, temp=0, temp_loss=0]


2025-12-06 14:17.14 [info     ] AWAC_20251206141631: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.019288463830947875, 'time_algorithm_update': 0.019064210414886474, 'critic_loss': 4.730847250461578, 'actor_loss': 4462.571756175995, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03862885975837708, 'td_error': 17.98932147421463, 'value_scale': -6.07263742487531, 'discounted_advantage': -23.544108029047806, 'initial_state': -8.92847728729248, 'diff_eval': 4133.248883234633} step=1000
2025-12-06 14:17.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.12it/s, critic_loss=5.99, actor_loss=-1.7e+3, temp=0, temp_loss=0]


2025-12-06 14:17.57 [info     ] AWAC_20251206141631: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.019821857690811157, 'time_algorithm_update': 0.019038296937942505, 'critic_loss': 5.989648578166961, 'actor_loss': -1697.283584777832, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03913462018966675, 'td_error': 46.4936011191377, 'value_scale': -5.2994574061973525, 'discounted_advantage': -45.90437656649223, 'initial_state': -13.438501358032227, 'diff_eval': 5009.747313424753} step=2000
2025-12-06 14:17.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.75it/s, critic_loss=4.04, actor_loss=-1.97e+3, temp=0, temp_loss=0]


2025-12-06 14:18.41 [info     ] AWAC_20251206141631: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.019854222059249877, 'time_algorithm_update': 0.019544638633728028, 'critic_loss': 4.035493700504303, 'actor_loss': -1972.3131243896485, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03969947385787964, 'td_error': 43.49961881252226, 'value_scale': -5.2625155219994015, 'discounted_advantage': -44.04215355717633, 'initial_state': -13.9186429977417, 'diff_eval': 4673.4577925230315} step=3000
2025-12-06 14:18.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.10it/s, critic_loss=2.88, actor_loss=-2.07e+3, temp=0, temp_loss=0]


2025-12-06 14:19.25 [info     ] AWAC_20251206141631: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.01970492458343506, 'time_algorithm_update': 0.019184419870376588, 'critic_loss': 2.876409879684448, 'actor_loss': -2075.331704345703, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03915559768676758, 'td_error': 46.425738652753566, 'value_scale': -5.305928895690643, 'discounted_advantage': -45.75423763241365, 'initial_state': -14.445533752441406, 'diff_eval': 4512.624485890482} step=4000
2025-12-06 14:19.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.09it/s, critic_loss=2.55, actor_loss=-2.24e+3, temp=0, temp_loss=0]


2025-12-06 14:20.08 [info     ] AWAC_20251206141631: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.019622644662857056, 'time_algorithm_update': 0.019234165906906128, 'critic_loss': 2.553721045613289, 'actor_loss': -2237.727841064453, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03914759278297424, 'td_error': 59.67384426934683, 'value_scale': -5.094329386179032, 'discounted_advantage': -52.54754192083043, 'initial_state': -15.46017074584961, 'diff_eval': 4508.3030749632335} step=5000
2025-12-06 14:20.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.52it/s, critic_loss=2.5, actor_loss=-2.46e+3, temp=0, temp_loss=0]


2025-12-06 14:20.51 [info     ] AWAC_20251206141631: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.019411439657211305, 'time_algorithm_update': 0.01883932876586914, 'critic_loss': 2.497121015071869, 'actor_loss': -2460.5931948242187, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03850931334495544, 'td_error': 64.57474830345545, 'value_scale': -5.572902249068503, 'discounted_advantage': -54.974578093763746, 'initial_state': -16.292776107788086, 'diff_eval': 4368.196570215063} step=6000
2025-12-06 14:20.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.45it/s, critic_loss=3.29, actor_loss=-2.75e+3, temp=0, temp_loss=0]


2025-12-06 14:21.34 [info     ] AWAC_20251206141631: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.019307487487792967, 'time_algorithm_update': 0.019013458728790282, 'critic_loss': 3.295181943178177, 'actor_loss': -2750.5803660888673, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03860225510597229, 'td_error': 254.20635357766645, 'value_scale': 0.27839117738506497, 'discounted_advantage': -116.22361260891044, 'initial_state': 0.014585733413696289, 'diff_eval': 8007.238336721758} step=7000
2025-12-06 14:21.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.56it/s, critic_loss=2.86, actor_loss=-3.13e+3, temp=0, temp_loss=0]


2025-12-06 14:22.19 [info     ] AWAC_20251206141631: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.02035869359970093, 'time_algorithm_update': 0.01924270462989807, 'critic_loss': 2.8544334651231766, 'actor_loss': -3127.1476171875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03999378180503845, 'td_error': 254.1159144443285, 'value_scale': 0.11399981549613908, 'discounted_advantage': -115.88642744199089, 'initial_state': -0.1340295672416687, 'diff_eval': 8006.22569014522} step=8000
2025-12-06 14:22.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.62it/s, critic_loss=2.8, actor_loss=-3.45e+3, temp=0, temp_loss=0]


2025-12-06 14:23.02 [info     ] AWAC_20251206141631: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.01935063862800598, 'time_algorithm_update': 0.018708065748214722, 'critic_loss': 2.798806191921234, 'actor_loss': -3455.174298828125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03833590531349182, 'td_error': 474.0224928903342, 'value_scale': -0.519390741419375, 'discounted_advantage': -154.41887616316336, 'initial_state': -1.4560372829437256, 'diff_eval': 8015.783273337173} step=9000
2025-12-06 14:23.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.69it/s, critic_loss=2.41, actor_loss=-3.77e+3, temp=0, temp_loss=0]


2025-12-06 14:23.46 [info     ] AWAC_20251206141631: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.02007137417793274, 'time_algorithm_update': 0.019453735828399658, 'critic_loss': 2.4115497249364855, 'actor_loss': -3773.70573046875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03980468583106995, 'td_error': 525.7971767122157, 'value_scale': 0.7580436810020479, 'discounted_advantage': -162.3039654998923, 'initial_state': -0.2717772424221039, 'diff_eval': 8015.783273337173} step=10000
2025-12-06 14:23.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.13it/s, critic_loss=2.11, actor_loss=-4.04e+3, temp=0, temp_loss=0]


2025-12-06 14:24.29 [info     ] AWAC_20251206141631: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.019596131563186646, 'time_algorithm_update': 0.01921861457824707, 'critic_loss': 2.1098688987493515, 'actor_loss': -4038.172138671875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03909625172615051, 'td_error': 508.9619969077554, 'value_scale': 0.4626473250901537, 'discounted_advantage': -159.00085273091574, 'initial_state': -0.6548604369163513, 'diff_eval': 8015.783272927884} step=11000
2025-12-06 14:24.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.93it/s, critic_loss=2, actor_loss=-4.26e+3, temp=0, temp_loss=0]  


2025-12-06 14:25.13 [info     ] AWAC_20251206141631: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.01973859190940857, 'time_algorithm_update': 0.019429494619369506, 'critic_loss': 1.9968029042482376, 'actor_loss': -4258.0676015625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03944711923599243, 'td_error': 523.8567440997992, 'value_scale': 0.5203952327412018, 'discounted_advantage': -160.88272829662728, 'initial_state': -0.46469950675964355, 'diff_eval': 8015.783269448932} step=12000
2025-12-06 14:25.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.30it/s, critic_loss=1.9, actor_loss=-4.44e+3, temp=0, temp_loss=0]


2025-12-06 14:25.56 [info     ] AWAC_20251206141631: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.019526794672012328, 'time_algorithm_update': 0.01906191349029541, 'critic_loss': 1.9021567548513412, 'actor_loss': -4437.450219726563, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0388555006980896, 'td_error': 534.4199335860354, 'value_scale': 0.6387847022404027, 'discounted_advantage': -162.73869775829638, 'initial_state': -0.6117880940437317, 'diff_eval': 8015.783207748682} step=13000
2025-12-06 14:25.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.71it/s, critic_loss=1.85, actor_loss=-4.58e+3, temp=0, temp_loss=0]


2025-12-06 14:26.40 [info     ] AWAC_20251206141631: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.020102097272872925, 'time_algorithm_update': 0.01939571714401245, 'critic_loss': 1.847634994983673, 'actor_loss': -4582.39923828125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03978020048141479, 'td_error': 544.2468394503446, 'value_scale': 0.8130072407418205, 'discounted_advantage': -164.29647702439448, 'initial_state': -0.7611425518989563, 'diff_eval': 8015.782907171001} step=14000
2025-12-06 14:26.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.87it/s, critic_loss=1.85, actor_loss=-4.18e+3, temp=0, temp_loss=0]


2025-12-06 14:27.22 [info     ] AWAC_20251206141631: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.018987359285354614, 'time_algorithm_update': 0.0187201726436615, 'critic_loss': 1.852782679438591, 'actor_loss': -4183.733610595703, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03797696232795715, 'td_error': 691.2283751571401, 'value_scale': 0.49297579012698767, 'discounted_advantage': -182.12937804344946, 'initial_state': -1.5997298955917358, 'diff_eval': 8015.800187986235} step=15000
2025-12-06 14:27.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.12it/s, critic_loss=2.06, actor_loss=-4.75e+3, temp=0, temp_loss=0]


2025-12-06 14:28.05 [info     ] AWAC_20251206141631: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.019592763423919676, 'time_algorithm_update': 0.01926253080368042, 'critic_loss': 2.0568023463487624, 'actor_loss': -4750.971291992188, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039133026361465456, 'td_error': 685.722973832677, 'value_scale': 0.40211046708252807, 'discounted_advantage': -182.0371481708668, 'initial_state': -2.020235300064087, 'diff_eval': 8015.800187986235} step=16000
2025-12-06 14:28.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.36it/s, critic_loss=2.11, actor_loss=-4.76e+3, temp=0, temp_loss=0]


2025-12-06 14:28.48 [info     ] AWAC_20251206141631: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.019459097385406493, 'time_algorithm_update': 0.01903016448020935, 'critic_loss': 2.1144774084091185, 'actor_loss': -4757.927174316406, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038759190320968626, 'td_error': 680.8759625514275, 'value_scale': 0.29975690644184455, 'discounted_advantage': -181.4295769150426, 'initial_state': -1.7877174615859985, 'diff_eval': 8015.800187986235} step=17000
2025-12-06 14:28.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.67it/s, critic_loss=2.07, actor_loss=-4.77e+3, temp=0, temp_loss=0]


2025-12-06 14:29.30 [info     ] AWAC_20251206141631: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.019319403171539306, 'time_algorithm_update': 0.018698044538497924, 'critic_loss': 2.0693285259008407, 'actor_loss': -4768.782818359375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03829306888580322, 'td_error': 684.0241492040227, 'value_scale': 0.3373744923587067, 'discounted_advantage': -181.58371344849323, 'initial_state': -1.6247262954711914, 'diff_eval': 8015.800187986235} step=18000
2025-12-06 14:29.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.72it/s, critic_loss=2.01, actor_loss=-4.79e+3, temp=0, temp_loss=0]


2025-12-06 14:30.16 [info     ] AWAC_20251206141631: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.020651877641677856, 'time_algorithm_update': 0.020508506298065186, 'critic_loss': 2.01484336745739, 'actor_loss': -4785.716099121094, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.041454405784606935, 'td_error': 711.5859307518607, 'value_scale': 0.8527339725487391, 'discounted_advantage': -184.60138895528405, 'initial_state': -0.860179603099823, 'diff_eval': 8015.800185530504} step=19000
2025-12-06 14:30.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.22it/s, critic_loss=2, actor_loss=-4.81e+3, temp=0, temp_loss=0]  


2025-12-06 14:31.00 [info     ] AWAC_20251206141631: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.01965713858604431, 'time_algorithm_update': 0.019047691822052, 'critic_loss': 1.99656707572937, 'actor_loss': -4810.4745234375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038974152088165286, 'td_error': 710.096365289512, 'value_scale': 0.7578232153697441, 'discounted_advantage': -185.02993688056392, 'initial_state': -1.0348552465438843, 'diff_eval': 8015.800185530504} step=20000
2025-12-06 14:31.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.84it/s, critic_loss=1.99, actor_loss=-4.84e+3, temp=0, temp_loss=0]


2025-12-06 14:31.43 [info     ] AWAC_20251206141631: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.019823401927947997, 'time_algorithm_update': 0.019409562349319457, 'critic_loss': 1.9909610823392867, 'actor_loss': -4844.8522768554685, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03953403663635254, 'td_error': 693.3509318378011, 'value_scale': 0.46832753465903576, 'discounted_advantage': -183.13250354814684, 'initial_state': -0.9685330390930176, 'diff_eval': 8015.800183074772} step=21000
2025-12-06 14:31.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.25it/s, critic_loss=1.98, actor_loss=-4.89e+3, temp=0, temp_loss=0]


2025-12-06 14:32.26 [info     ] AWAC_20251206141631: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.019571329832077027, 'time_algorithm_update': 0.01903567314147949, 'critic_loss': 1.9759380111694336, 'actor_loss': -4887.468637695312, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038887006044387815, 'td_error': 693.9986826066241, 'value_scale': 0.47748964670764504, 'discounted_advantage': -183.1865398118822, 'initial_state': -0.9333488941192627, 'diff_eval': 8015.8001806190405} step=22000
2025-12-06 14:32.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.54it/s, critic_loss=1.97, actor_loss=-4.93e+3, temp=0, temp_loss=0]


2025-12-06 14:33.09 [info     ] AWAC_20251206141631: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.019278261184692384, 'time_algorithm_update': 0.018948142290115357, 'critic_loss': 1.9690553146600722, 'actor_loss': -4934.239643066407, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03848572659492493, 'td_error': 689.1576022217868, 'value_scale': 0.4030594947386442, 'discounted_advantage': -182.5962849262057, 'initial_state': -0.7331846356391907, 'diff_eval': 8015.8001806190405} step=23000
2025-12-06 14:33.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.02it/s, critic_loss=1.94, actor_loss=-4.98e+3, temp=0, temp_loss=0]


2025-12-06 14:33.53 [info     ] AWAC_20251206141631: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.019740572690963746, 'time_algorithm_update': 0.019244967460632326, 'critic_loss': 1.9365913743972778, 'actor_loss': -4980.095608398437, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03927052664756775, 'td_error': 701.3877573221657, 'value_scale': 0.5355926504232184, 'discounted_advantage': -184.20544767086562, 'initial_state': -0.729642927646637, 'diff_eval': 8015.800169158962} step=24000
2025-12-06 14:33.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.41it/s, critic_loss=1.91, actor_loss=-5.02e+3, temp=0, temp_loss=0]


2025-12-06 14:34.35 [info     ] AWAC_20251206141631: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.01945961093902588, 'time_algorithm_update': 0.018941412210464477, 'critic_loss': 1.9063213237524033, 'actor_loss': -5021.142155761719, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0386820273399353, 'td_error': 716.2345070756506, 'value_scale': 0.8284959109626516, 'discounted_advantage': -186.31546973060642, 'initial_state': -0.08293510228395462, 'diff_eval': 8015.800169158962} step=25000
2025-12-06 14:34.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.77it/s, critic_loss=1.86, actor_loss=-5.06e+3, temp=0, temp_loss=0]


2025-12-06 14:35.19 [info     ] AWAC_20251206141631: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.019957682609558106, 'time_algorithm_update': 0.019388280391693116, 'critic_loss': 1.8585613077878953, 'actor_loss': -5056.116682128906, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03963401198387146, 'td_error': 697.6064860405227, 'value_scale': 0.6446301007349815, 'discounted_advantage': -184.05116438136147, 'initial_state': -0.07817325741052628, 'diff_eval': 8015.800162610344} step=26000
2025-12-06 14:35.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.06it/s, critic_loss=1.85, actor_loss=-5.08e+3, temp=0, temp_loss=0]


2025-12-06 14:36.03 [info     ] AWAC_20251206141631: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.019486138105392455, 'time_algorithm_update': 0.019418065309524536, 'critic_loss': 1.8457473258972168, 'actor_loss': -5084.67187109375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03919219040870667, 'td_error': 699.3381328907126, 'value_scale': 0.7305023015399869, 'discounted_advantage': -184.40279781250766, 'initial_state': -0.01809980347752571, 'diff_eval': 8015.80014521558} step=27000
2025-12-06 14:36.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.51it/s, critic_loss=1.85, actor_loss=-5.11e+3, temp=0, temp_loss=0]


2025-12-06 14:36.45 [info     ] AWAC_20251206141631: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.019267826557159425, 'time_algorithm_update': 0.018972110509872436, 'critic_loss': 1.850088872551918, 'actor_loss': -5107.885915527344, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038506393909454345, 'td_error': 700.0485663278636, 'value_scale': 0.7224757558004954, 'discounted_advantage': -184.23777175521136, 'initial_state': 0.17438706755638123, 'diff_eval': 8015.800081571209} step=28000
2025-12-06 14:36.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.23it/s, critic_loss=1.81, actor_loss=-5.13e+3, temp=0, temp_loss=0]


2025-12-06 14:37.29 [info     ] AWAC_20251206141631: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.019540232181549072, 'time_algorithm_update': 0.01914376211166382, 'critic_loss': 1.8132207136154175, 'actor_loss': -5126.706556152344, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03895041918754578, 'td_error': 707.378134956625, 'value_scale': 0.9324633604819572, 'discounted_advantage': -185.0056112415419, 'initial_state': 0.601292610168457, 'diff_eval': 8015.800029898529} step=29000
2025-12-06 14:37.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.12it/s, critic_loss=1.81, actor_loss=-5.14e+3, temp=0, temp_loss=0]


2025-12-06 14:38.12 [info     ] AWAC_20251206141631: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.01968700623512268, 'time_algorithm_update': 0.01916121530532837, 'critic_loss': 1.8063042359352113, 'actor_loss': -5141.745845214844, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03911681604385376, 'td_error': 690.0530544426875, 'value_scale': 0.6274978583538086, 'discounted_advantage': -182.87346352039825, 'initial_state': 0.01642579957842827, 'diff_eval': 8015.799938422537} step=30000
2025-12-06 14:38.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.77it/s, critic_loss=1.76, actor_loss=-5.15e+3, temp=0, temp_loss=0]


2025-12-06 14:38.54 [info     ] AWAC_20251206141631: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.019056859970092772, 'time_algorithm_update': 0.018779189109802245, 'critic_loss': 1.7562376894950866, 'actor_loss': -5153.681251464844, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03811276388168335, 'td_error': 697.2419535956776, 'value_scale': 0.9859061878969251, 'discounted_advantage': -183.71289465910618, 'initial_state': 0.385833203792572, 'diff_eval': 8015.799693821462} step=31000
2025-12-06 14:38.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.90it/s, critic_loss=2.89, actor_loss=2.39e+5, temp=0, temp_loss=0]


2025-12-06 14:39.38 [info     ] AWAC_20251206141631: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.01988548755645752, 'time_algorithm_update': 0.019331819772720336, 'critic_loss': 2.8813294533491134, 'actor_loss': 237118.18534570312, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03947975778579712, 'td_error': 996.5565793654372, 'value_scale': 0.0972967446779353, 'discounted_advantage': -216.62171282732166, 'initial_state': -1.3313462734222412, 'diff_eval': 8015.80050293378} step=32000
2025-12-06 14:39.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.14it/s, critic_loss=1.92, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:40.21 [info     ] AWAC_20251206141631: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.019479565858840944, 'time_algorithm_update': 0.0193190598487854, 'critic_loss': 1.918286502122879, 'actor_loss': -5166.758174316406, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03908879947662353, 'td_error': 1001.746425773764, 'value_scale': 0.2813671296021346, 'discounted_advantage': -217.3538941997145, 'initial_state': -1.7027398347854614, 'diff_eval': 8015.80050293378} step=33000
2025-12-06 14:40.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.07it/s, critic_loss=1.9, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:41.04 [info     ] AWAC_20251206141631: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.019729029178619385, 'time_algorithm_update': 0.019208388566970824, 'critic_loss': 1.903905034184456, 'actor_loss': -5166.75753515625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039218611001968384, 'td_error': 1019.9259205626294, 'value_scale': 0.5595808142198176, 'discounted_advantage': -218.87793420223426, 'initial_state': -1.3328464031219482, 'diff_eval': 8015.80050293378} step=34000
2025-12-06 14:41.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=1.98, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:41.48 [info     ] AWAC_20251206141631: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.019573519229888914, 'time_algorithm_update': 0.019201061010360718, 'critic_loss': 1.9839643793106079, 'actor_loss': -5166.749831054687, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039043216705322265, 'td_error': 1005.4971380670299, 'value_scale': 0.38435462332343917, 'discounted_advantage': -217.11413273025101, 'initial_state': -1.4225918054580688, 'diff_eval': 8015.80050293378} step=35000
2025-12-06 14:41.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.30it/s, critic_loss=2.04, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:42.31 [info     ] AWAC_20251206141631: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.01956901741027832, 'time_algorithm_update': 0.019029566764831543, 'critic_loss': 2.0380421034097673, 'actor_loss': -5166.7495947265625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038864991664886474, 'td_error': 1038.1857931370084, 'value_scale': 0.8469316206485258, 'discounted_advantage': -220.65369452125725, 'initial_state': -0.8693814873695374, 'diff_eval': 8015.80050293378} step=36000
2025-12-06 14:42.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=2.06, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:43.14 [info     ] AWAC_20251206141631: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.01964950442314148, 'time_algorithm_update': 0.019059993982315063, 'critic_loss': 2.0606714372634887, 'actor_loss': -5166.76115625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039010612964630126, 'td_error': 1003.2071371273472, 'value_scale': 0.463457115208816, 'discounted_advantage': -217.02769761989904, 'initial_state': -1.3615126609802246, 'diff_eval': 8015.80050293378} step=37000
2025-12-06 14:43.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.21it/s, critic_loss=2.03, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:43.57 [info     ] AWAC_20251206141631: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.019746743440628052, 'time_algorithm_update': 0.018965842723846434, 'critic_loss': 2.033870993733406, 'actor_loss': -5166.782656738281, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03898628735542297, 'td_error': 1027.1095266981215, 'value_scale': 0.7548434590110485, 'discounted_advantage': -219.34761250411495, 'initial_state': -1.0834945440292358, 'diff_eval': 8015.80050293378} step=38000
2025-12-06 14:43.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.15it/s, critic_loss=2.02, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:44.40 [info     ] AWAC_20251206141631: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.019700183153152466, 'time_algorithm_update': 0.019115174770355226, 'critic_loss': 2.0199706498384478, 'actor_loss': -5166.810817382812, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03909212303161621, 'td_error': 984.1049158713721, 'value_scale': 0.10961650093480295, 'discounted_advantage': -214.91882377325018, 'initial_state': -1.794779658317566, 'diff_eval': 8015.80050293378} step=39000
2025-12-06 14:44.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.22it/s, critic_loss=2.02, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:45.23 [info     ] AWAC_20251206141631: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.019558356046676637, 'time_algorithm_update': 0.01914575147628784, 'critic_loss': 2.017324384689331, 'actor_loss': -5166.858334960937, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03897811532020569, 'td_error': 1027.582642903839, 'value_scale': 0.7132179574964432, 'discounted_advantage': -219.46706773410662, 'initial_state': -1.1987025737762451, 'diff_eval': 8015.80050293378} step=40000
2025-12-06 14:45.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.56it/s, critic_loss=2.02, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:46.08 [info     ] AWAC_20251206141631: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.02034132266044617, 'time_algorithm_update': 0.01935179543495178, 'critic_loss': 2.0142916264534, 'actor_loss': -5166.93941015625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03999364757537842, 'td_error': 1008.3856597572691, 'value_scale': 0.5903163332525682, 'discounted_advantage': -218.09363765780827, 'initial_state': -1.2861297130584717, 'diff_eval': 8015.80050293378} step=41000
2025-12-06 14:46.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.40it/s, critic_loss=1.98, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:46.52 [info     ] AWAC_20251206141631: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.0207436363697052, 'time_algorithm_update': 0.019303653240203857, 'critic_loss': 1.9761489075422287, 'actor_loss': -5167.0635595703125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.040326188564300536, 'td_error': 1007.275080517836, 'value_scale': 0.48501378215134316, 'discounted_advantage': -217.75788633299197, 'initial_state': -1.369848370552063, 'diff_eval': 8015.80050293378} step=42000
2025-12-06 14:46.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.43it/s, critic_loss=1.96, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:47.35 [info     ] AWAC_20251206141631: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.01944969058036804, 'time_algorithm_update': 0.01892413353919983, 'critic_loss': 1.9652198481559753, 'actor_loss': -5167.2719965820315, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03864184737205505, 'td_error': 1004.488601189134, 'value_scale': 0.3212434620173353, 'discounted_advantage': -217.05212513589052, 'initial_state': -1.3753172159194946, 'diff_eval': 8015.80050293378} step=43000
2025-12-06 14:47.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.09it/s, critic_loss=1.97, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:48.20 [info     ] AWAC_20251206141631: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.021315333127975462, 'time_algorithm_update': 0.019224512577056884, 'critic_loss': 1.9740023090839387, 'actor_loss': -5167.609436523438, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04081187343597412, 'td_error': 1008.5050842423559, 'value_scale': 0.5077194274943369, 'discounted_advantage': -217.99779702608382, 'initial_state': -1.3925737142562866, 'diff_eval': 8015.80050293378} step=44000
2025-12-06 14:48.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.32it/s, critic_loss=1.96, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:49.03 [info     ] AWAC_20251206141631: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.01941232466697693, 'time_algorithm_update': 0.01911714768409729, 'critic_loss': 1.9653704317808152, 'actor_loss': -5168.148281738281, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038814319133758546, 'td_error': 992.5670506028753, 'value_scale': 0.31548730548305715, 'discounted_advantage': -215.94974515245144, 'initial_state': -1.318994164466858, 'diff_eval': 8015.80050293378} step=45000
2025-12-06 14:49.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.26it/s, critic_loss=1.96, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:49.46 [info     ] AWAC_20251206141631: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.019644610166549684, 'time_algorithm_update': 0.01900330638885498, 'critic_loss': 1.9645228598117828, 'actor_loss': -5169.003609863281, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03892359352111816, 'td_error': 1037.4672939214765, 'value_scale': 1.0686964622331034, 'discounted_advantage': -220.98681483779242, 'initial_state': -0.41494742035865784, 'diff_eval': 8015.80050293378} step=46000
2025-12-06 14:49.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.36it/s, critic_loss=1.96, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:50.29 [info     ] AWAC_20251206141631: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.019454383611679078, 'time_algorithm_update': 0.019042189359664917, 'critic_loss': 1.9567399489879609, 'actor_loss': -5170.318913085937, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03876088619232178, 'td_error': 1009.3132393990188, 'value_scale': 0.6518785440345816, 'discounted_advantage': -217.97427517894639, 'initial_state': -0.5245113372802734, 'diff_eval': 8015.80050293378} step=47000
2025-12-06 14:50.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.79it/s, critic_loss=1.97, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:51.11 [info     ] AWAC_20251206141631: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.019091885566711427, 'time_algorithm_update': 0.01873539185523987, 'critic_loss': 1.9675655326843262, 'actor_loss': -5172.241946777343, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038095083951950076, 'td_error': 997.4882945159989, 'value_scale': 0.39665618408565023, 'discounted_advantage': -216.52865336644194, 'initial_state': -1.0126092433929443, 'diff_eval': 8015.80050293378} step=48000
2025-12-06 14:51.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.44it/s, critic_loss=1.97, actor_loss=-5.17e+3, temp=0, temp_loss=0]


2025-12-06 14:51.54 [info     ] AWAC_20251206141631: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.019283774375915528, 'time_algorithm_update': 0.019022708177566527, 'critic_loss': 1.967310769200325, 'actor_loss': -5174.895255859375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03858611011505127, 'td_error': 1002.8216696329088, 'value_scale': 0.5602301447297082, 'discounted_advantage': -217.24730998282985, 'initial_state': -0.35257667303085327, 'diff_eval': 8015.80050293378} step=49000
2025-12-06 14:51.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.68it/s, critic_loss=1.97, actor_loss=-5.18e+3, temp=0, temp_loss=0]


2025-12-06 14:52.37 [info     ] AWAC_20251206141631: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.01927506923675537, 'time_algorithm_update': 0.018688817024230957, 'critic_loss': 1.9669953075647355, 'actor_loss': -5178.176782714844, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03824020028114319, 'td_error': 1006.6743167853849, 'value_scale': 0.6392861412438405, 'discounted_advantage': -217.93506634075138, 'initial_state': -0.5284594893455505, 'diff_eval': 8015.80050293378} step=50000
2025-12-06 14:52.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.64it/s, critic_loss=1.95, actor_loss=-5.18e+3, temp=0, temp_loss=0]


2025-12-06 14:53.19 [info     ] AWAC_20251206141631: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.01916208076477051, 'time_algorithm_update': 0.018851304054260255, 'critic_loss': 1.9464149731397629, 'actor_loss': -5181.81319921875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03829883074760437, 'td_error': 1009.296119552845, 'value_scale': 0.6642085132177385, 'discounted_advantage': -218.34393186802018, 'initial_state': -0.38597506284713745, 'diff_eval': 8015.80050293378} step=51000
2025-12-06 14:53.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=1.92, actor_loss=-5.19e+3, temp=0, temp_loss=0]


2025-12-06 14:54.02 [info     ] AWAC_20251206141631: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.019606953144073486, 'time_algorithm_update': 0.01905905055999756, 'critic_loss': 1.9244721937179565, 'actor_loss': -5185.400870605468, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038958139419555664, 'td_error': 994.9896846565028, 'value_scale': 0.6166230531164257, 'discounted_advantage': -217.17054098983115, 'initial_state': -0.0571989044547081, 'diff_eval': 8015.80050293378} step=52000
2025-12-06 14:54.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.42it/s, critic_loss=1.9, actor_loss=-5.19e+3, temp=0, temp_loss=0]


2025-12-06 14:54.45 [info     ] AWAC_20251206141631: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.019613468408584595, 'time_algorithm_update': 0.018766143798828124, 'critic_loss': 1.9044019352197648, 'actor_loss': -5188.646224609375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038655306339263916, 'td_error': 1020.231841868902, 'value_scale': 0.7968415623750891, 'discounted_advantage': -219.36898677517638, 'initial_state': 0.04258985444903374, 'diff_eval': 8015.80050293378} step=53000
2025-12-06 14:54.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.28it/s, critic_loss=1.86, actor_loss=-5.19e+3, temp=0, temp_loss=0]


2025-12-06 14:55.30 [info     ] AWAC_20251206141631: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.01993515205383301, 'time_algorithm_update': 0.020260774850845337, 'critic_loss': 1.8613063215017318, 'actor_loss': -5191.40602734375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.040485477447509764, 'td_error': 991.6720653744403, 'value_scale': 0.5118771947620551, 'discounted_advantage': -216.21631711191569, 'initial_state': 0.20660322904586792, 'diff_eval': 8015.80050293378} step=54000
2025-12-06 14:55.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.97it/s, critic_loss=1.86, actor_loss=-5.19e+3, temp=0, temp_loss=0]


2025-12-06 14:56.15 [info     ] AWAC_20251206141631: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.021205421924591066, 'time_algorithm_update': 0.019513962984085084, 'critic_loss': 1.8625160450935363, 'actor_loss': -5193.685544433593, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04101213002204895, 'td_error': 995.3581215012086, 'value_scale': 0.5392901865462558, 'discounted_advantage': -216.76520556971485, 'initial_state': 0.28343942761421204, 'diff_eval': 8015.80050293378} step=55000
2025-12-06 14:56.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.02it/s, critic_loss=1.86, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 14:57.00 [info     ] AWAC_20251206141631: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.02053043818473816, 'time_algorithm_update': 0.019289193630218505, 'critic_loss': 1.8628201793432235, 'actor_loss': -5195.530482421875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04011931347846985, 'td_error': 989.2034185670235, 'value_scale': 0.4249370389603147, 'discounted_advantage': -215.7287004138946, 'initial_state': 0.06541101634502411, 'diff_eval': 8015.80050293378} step=56000
2025-12-06 14:57.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.29it/s, critic_loss=1.86, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 14:57.43 [info     ] AWAC_20251206141631: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.019354346752166747, 'time_algorithm_update': 0.019225038051605223, 'critic_loss': 1.8572124087810515, 'actor_loss': -5197.0208061523435, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03885756826400757, 'td_error': 1024.9603173125033, 'value_scale': 0.9249854499485097, 'discounted_advantage': -219.77954360760336, 'initial_state': 0.6275285482406616, 'diff_eval': 8015.80050293378} step=57000
2025-12-06 14:57.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.07it/s, critic_loss=1.87, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 14:58.27 [info     ] AWAC_20251206141631: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.019599321842193602, 'time_algorithm_update': 0.01931633186340332, 'critic_loss': 1.8652128524780274, 'actor_loss': -5198.214034667969, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03919353342056275, 'td_error': 1007.7079857664057, 'value_scale': 0.6339437356917382, 'discounted_advantage': -217.91298701335867, 'initial_state': 0.7058947682380676, 'diff_eval': 8015.80050293378} step=58000
2025-12-06 14:58.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.81it/s, critic_loss=1.85, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 14:59.11 [info     ] AWAC_20251206141631: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.01989127540588379, 'time_algorithm_update': 0.01943874192237854, 'critic_loss': 1.845577737212181, 'actor_loss': -5199.164742675781, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039614817142486575, 'td_error': 983.9545505727056, 'value_scale': 0.42121797313403153, 'discounted_advantage': -215.36814432696832, 'initial_state': 0.25651922821998596, 'diff_eval': 8015.80050293378} step=59000
2025-12-06 14:59.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.02it/s, critic_loss=1.83, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 14:59.54 [info     ] AWAC_20251206141631: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.019669523000717164, 'time_algorithm_update': 0.01930872130393982, 'critic_loss': 1.82821199631691, 'actor_loss': -5199.937616699219, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039258330583572386, 'td_error': 1022.1340077330904, 'value_scale': 0.783679439277175, 'discounted_advantage': -219.23582804215525, 'initial_state': 0.7834945917129517, 'diff_eval': 8015.80050293378} step=60000
2025-12-06 14:59.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.76it/s, critic_loss=1.83, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:00.38 [info     ] AWAC_20251206141631: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.020054468154907225, 'time_algorithm_update': 0.0193607759475708, 'critic_loss': 1.829972200870514, 'actor_loss': -5200.554421386719, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039697694301605226, 'td_error': 1003.1678607287822, 'value_scale': 0.6360752373607478, 'discounted_advantage': -217.4983058774902, 'initial_state': 0.6300186514854431, 'diff_eval': 8015.80050293378} step=61000
2025-12-06 15:00.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.26it/s, critic_loss=1.81, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:01.21 [info     ] AWAC_20251206141631: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.019532318353652954, 'time_algorithm_update': 0.019101685285568236, 'critic_loss': 1.8105610867738724, 'actor_loss': -5201.044506347656, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03890913057327271, 'td_error': 993.1476117818693, 'value_scale': 0.5443444520505168, 'discounted_advantage': -216.16262185910702, 'initial_state': 0.5363160967826843, 'diff_eval': 8015.80050293378} step=62000
2025-12-06 15:01.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.25it/s, critic_loss=1.79, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:02.04 [info     ] AWAC_20251206141631: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.019571113109588624, 'time_algorithm_update': 0.01904222822189331, 'critic_loss': 1.7865548754930496, 'actor_loss': -5201.437835449219, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0388891544342041, 'td_error': 1031.3791393801964, 'value_scale': 0.9902452954699262, 'discounted_advantage': -220.20941591530914, 'initial_state': 1.2872064113616943, 'diff_eval': 8015.80050293378} step=63000
2025-12-06 15:02.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.01it/s, critic_loss=1.77, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:02.48 [info     ] AWAC_20251206141631: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.019801807165145874, 'time_algorithm_update': 0.019205710887908935, 'critic_loss': 1.7732302172183991, 'actor_loss': -5201.753114746094, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03928904104232788, 'td_error': 1005.4386920655072, 'value_scale': 0.6753223888210683, 'discounted_advantage': -217.85111218153202, 'initial_state': 1.0128363370895386, 'diff_eval': 8015.80050293378} step=64000
2025-12-06 15:02.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.18it/s, critic_loss=1.77, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:03.31 [info     ] AWAC_20251206141631: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.019663445711135864, 'time_algorithm_update': 0.01909448504447937, 'critic_loss': 1.7652045780420302, 'actor_loss': -5202.001921386719, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03902847743034363, 'td_error': 1014.4339430895669, 'value_scale': 0.8755140455965849, 'discounted_advantage': -218.8431640804403, 'initial_state': 1.0904017686843872, 'diff_eval': 8015.80050293378} step=65000
2025-12-06 15:03.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.93it/s, critic_loss=1.78, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:04.14 [info     ] AWAC_20251206141631: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.019884531259536743, 'time_algorithm_update': 0.019220545768737794, 'critic_loss': 1.7752841812372209, 'actor_loss': -5202.2031015625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03939541459083557, 'td_error': 1031.0146566703095, 'value_scale': 1.0984142976239235, 'discounted_advantage': -220.1697426288187, 'initial_state': 1.3530246019363403, 'diff_eval': 8015.80050293378} step=66000
2025-12-06 15:04.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.22it/s, critic_loss=1.77, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:04.57 [info     ] AWAC_20251206141631: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.01949292230606079, 'time_algorithm_update': 0.019223543643951416, 'critic_loss': 1.7749216610193252, 'actor_loss': -5202.362953125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03897801232337952, 'td_error': 1011.6306445734213, 'value_scale': 0.891229766524457, 'discounted_advantage': -218.07583290828538, 'initial_state': 1.2859587669372559, 'diff_eval': 8015.80050293378} step=67000
2025-12-06 15:04.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.12it/s, critic_loss=1.79, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:05.42 [info     ] AWAC_20251206141631: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.02073536777496338, 'time_algorithm_update': 0.019700107574462892, 'critic_loss': 1.78987155854702, 'actor_loss': -5202.491079589844, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.040729466438293456, 'td_error': 1004.9241967440762, 'value_scale': 0.7101012115848422, 'discounted_advantage': -218.1429342416341, 'initial_state': 1.1243175268173218, 'diff_eval': 8015.80050293378} step=68000
2025-12-06 15:05.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=1.8, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:06.26 [info     ] AWAC_20251206141631: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.019677118062973023, 'time_algorithm_update': 0.019067445278167724, 'critic_loss': 1.8040894639492036, 'actor_loss': -5202.593265625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039036425590515136, 'td_error': 1017.8795527217427, 'value_scale': 0.8863950745707685, 'discounted_advantage': -219.03591696254168, 'initial_state': 1.5308010578155518, 'diff_eval': 8015.80050293378} step=69000
2025-12-06 15:06.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.83it/s, critic_loss=1.82, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:07.08 [info     ] AWAC_20251206141631: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.0191199688911438, 'time_algorithm_update': 0.018656049489974975, 'critic_loss': 1.815851757287979, 'actor_loss': -5202.673980957031, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03804658555984497, 'td_error': 997.6635199323939, 'value_scale': 0.6120336064061794, 'discounted_advantage': -217.05041528389918, 'initial_state': 1.1326770782470703, 'diff_eval': 8015.80050293378} step=70000
2025-12-06 15:07.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.33it/s, critic_loss=1.83, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:07.51 [info     ] AWAC_20251206141631: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.019562000513076782, 'time_algorithm_update': 0.0189591703414917, 'critic_loss': 1.8321813356876373, 'actor_loss': -5202.738969238281, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03879693961143494, 'td_error': 1029.346398174102, 'value_scale': 1.184413296419562, 'discounted_advantage': -220.5714800533454, 'initial_state': 1.9707510471343994, 'diff_eval': 8015.80050293378} step=71000
2025-12-06 15:07.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.49it/s, critic_loss=1.83, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:08.34 [info     ] AWAC_20251206141631: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.01927154302597046, 'time_algorithm_update': 0.018942575931549074, 'critic_loss': 1.8245943182706832, 'actor_loss': -5202.790825195312, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03850635361671448, 'td_error': 1000.849890597269, 'value_scale': 0.7878066240790341, 'discounted_advantage': -217.43318144445868, 'initial_state': 1.1920939683914185, 'diff_eval': 8015.80050293378} step=72000
2025-12-06 15:08.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.28it/s, critic_loss=1.83, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:09.17 [info     ] AWAC_20251206141631: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.019470940351486207, 'time_algorithm_update': 0.01913580060005188, 'critic_loss': 1.8258147166967391, 'actor_loss': -5202.833704101563, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03887439465522766, 'td_error': 1007.8278299022406, 'value_scale': 0.6951768590950806, 'discounted_advantage': -218.48059295678945, 'initial_state': 1.219314455986023, 'diff_eval': 8015.80050293378} step=73000
2025-12-06 15:09.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.64it/s, critic_loss=1.83, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:10.01 [info     ] AWAC_20251206141631: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.020188877582550047, 'time_algorithm_update': 0.019402861118316652, 'critic_loss': 1.8249856696128846, 'actor_loss': -5202.867501953125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0398844838142395, 'td_error': 1010.2142148545587, 'value_scale': 0.862498636523766, 'discounted_advantage': -218.479628327471, 'initial_state': 1.5795994997024536, 'diff_eval': 8015.80050293378} step=74000
2025-12-06 15:10.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.92it/s, critic_loss=1.81, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:10.44 [info     ] AWAC_20251206141631: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.01980216431617737, 'time_algorithm_update': 0.019297548532485963, 'critic_loss': 1.8074144675731658, 'actor_loss': -5202.893640625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03939183306694031, 'td_error': 995.5402055474592, 'value_scale': 0.7911753805361263, 'discounted_advantage': -217.10642409310918, 'initial_state': 1.422995924949646, 'diff_eval': 8015.80050293378} step=75000
2025-12-06 15:10.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.27it/s, critic_loss=1.83, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:11.27 [info     ] AWAC_20251206141631: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.019622164011001586, 'time_algorithm_update': 0.018976675510406493, 'critic_loss': 1.8293602927923203, 'actor_loss': -5202.914631835937, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038879210472106934, 'td_error': 998.3101509938501, 'value_scale': 0.728667830590905, 'discounted_advantage': -217.51742720732432, 'initial_state': 1.4972604513168335, 'diff_eval': 8015.80050293378} step=76000
2025-12-06 15:11.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.97it/s, critic_loss=1.83, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:12.11 [info     ] AWAC_20251206141631: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.019830706357955932, 'time_algorithm_update': 0.01926836133003235, 'critic_loss': 1.8345850553512573, 'actor_loss': -5202.930938476563, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03937694311141968, 'td_error': 1004.0870021438442, 'value_scale': 0.7752116241032525, 'discounted_advantage': -217.71725144890289, 'initial_state': 1.2893617153167725, 'diff_eval': 8015.80050293378} step=77000
2025-12-06 15:12.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.03it/s, critic_loss=1.83, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:12.56 [info     ] AWAC_20251206141631: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.02116828203201294, 'time_algorithm_update': 0.01945106530189514, 'critic_loss': 1.8301449627876283, 'actor_loss': -5202.944307128906, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04091793727874756, 'td_error': 975.1558489779757, 'value_scale': 0.5759638140789153, 'discounted_advantage': -214.63851471066206, 'initial_state': 1.3256374597549438, 'diff_eval': 8015.80050293378} step=78000
2025-12-06 15:12.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.15it/s, critic_loss=1.82, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:13.39 [info     ] AWAC_20251206141631: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.019687402486801148, 'time_algorithm_update': 0.019135069131851198, 'critic_loss': 1.818966844677925, 'actor_loss': -5202.95258984375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03908439922332764, 'td_error': 989.4342984765118, 'value_scale': 0.48533053704664697, 'discounted_advantage': -216.04668166677206, 'initial_state': 1.3615808486938477, 'diff_eval': 8015.80050293378} step=79000
2025-12-06 15:13.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.98it/s, critic_loss=1.84, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:14.23 [info     ] AWAC_20251206141631: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.019625035285949707, 'time_algorithm_update': 0.019249752044677736, 'critic_loss': 1.8387775554656982, 'actor_loss': -5202.96294140625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03916076827049255, 'td_error': 1002.1318870356538, 'value_scale': 1.046729091451053, 'discounted_advantage': -217.28682580017684, 'initial_state': 2.0005083084106445, 'diff_eval': 8015.80050293378} step=80000
2025-12-06 15:14.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.58it/s, critic_loss=1.88, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:15.05 [info     ] AWAC_20251206141631: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.019280253887176513, 'time_algorithm_update': 0.018861279249191284, 'critic_loss': 1.8838588073253633, 'actor_loss': -5202.933943847656, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03840428113937378, 'td_error': 991.826734192955, 'value_scale': 0.7138663908245771, 'discounted_advantage': -216.93713477049045, 'initial_state': 1.5535157918930054, 'diff_eval': 8015.80050293378} step=81000
2025-12-06 15:15.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.93it/s, critic_loss=1.95, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:15.49 [info     ] AWAC_20251206141631: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.019987778663635256, 'time_algorithm_update': 0.019162017822265624, 'critic_loss': 1.9485799429416657, 'actor_loss': -5202.926229492187, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03944417810440064, 'td_error': 1009.5055548656334, 'value_scale': 1.0447131134382288, 'discounted_advantage': -218.749108108047, 'initial_state': 2.17006254196167, 'diff_eval': 8015.80050293378} step=82000
2025-12-06 15:15.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.44it/s, critic_loss=2.04, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:16.32 [info     ] AWAC_20251206141631: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.019057973861694336, 'time_algorithm_update': 0.019081700086593628, 'critic_loss': 2.0379750427007677, 'actor_loss': -5202.858307617187, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03863860297203064, 'td_error': 1031.331556769265, 'value_scale': 1.5445767873820082, 'discounted_advantage': -221.05042769439976, 'initial_state': 3.14399790763855, 'diff_eval': 8015.80050293378} step=83000
2025-12-06 15:16.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.03it/s, critic_loss=2.06, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:17.15 [info     ] AWAC_20251206141631: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.019766639709472657, 'time_algorithm_update': 0.01924402189254761, 'critic_loss': 2.056438101530075, 'actor_loss': -5202.850956542969, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03929173469543457, 'td_error': 1025.5347129028078, 'value_scale': 1.2432356686967567, 'discounted_advantage': -220.21950738284482, 'initial_state': 2.630807876586914, 'diff_eval': 8015.80050293378} step=84000
2025-12-06 15:17.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.08it/s, critic_loss=2.01, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:17.58 [info     ] AWAC_20251206141631: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.019747484922409058, 'time_algorithm_update': 0.019150299310684203, 'critic_loss': 2.0060256288051606, 'actor_loss': -5202.9412451171875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03919494724273682, 'td_error': 1022.6391442864923, 'value_scale': 1.2071463650144005, 'discounted_advantage': -219.5894180768682, 'initial_state': 2.4320766925811768, 'diff_eval': 8015.80050293378} step=85000
2025-12-06 15:17.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.55it/s, critic_loss=1.96, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:18.41 [info     ] AWAC_20251206141631: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.019405128002166747, 'time_algorithm_update': 0.018768817663192747, 'critic_loss': 1.9581861834526062, 'actor_loss': -5202.928507324219, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03845187449455261, 'td_error': 1006.0534281129418, 'value_scale': 0.8505970709438047, 'discounted_advantage': -217.9503555487246, 'initial_state': 1.9851813316345215, 'diff_eval': 8015.80050293378} step=86000
2025-12-06 15:18.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.56it/s, critic_loss=1.89, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:19.24 [info     ] AWAC_20251206141631: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.0193297598361969, 'time_algorithm_update': 0.018868735313415527, 'critic_loss': 1.8893843414783478, 'actor_loss': -5202.970319335937, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03846259665489197, 'td_error': 1007.3045546695245, 'value_scale': 0.9850318241631807, 'discounted_advantage': -218.28949531098405, 'initial_state': 2.1898436546325684, 'diff_eval': 8015.80050293378} step=87000
2025-12-06 15:19.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.71it/s, critic_loss=1.85, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:20.06 [info     ] AWAC_20251206141631: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.019113511085510255, 'time_algorithm_update': 0.01879085063934326, 'critic_loss': 1.8531103090047836, 'actor_loss': -5202.9802734375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038185603141784666, 'td_error': 1014.0852931186763, 'value_scale': 1.070690916165747, 'discounted_advantage': -218.84526939247337, 'initial_state': 2.285719394683838, 'diff_eval': 8015.80050293378} step=88000
2025-12-06 15:20.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.59it/s, critic_loss=1.81, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:20.49 [info     ] AWAC_20251206141631: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.01948785424232483, 'time_algorithm_update': 0.0187016921043396, 'critic_loss': 1.8133643524646759, 'actor_loss': -5202.993025390625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03844537949562073, 'td_error': 997.9984863795215, 'value_scale': 0.907144609680251, 'discounted_advantage': -217.00884859996563, 'initial_state': 2.364924669265747, 'diff_eval': 8015.80050293378} step=89000
2025-12-06 15:20.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.17it/s, critic_loss=1.78, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:21.34 [info     ] AWAC_20251206141631: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.020444403171539307, 'time_algorithm_update': 0.019957164764404296, 'critic_loss': 1.7845373146533967, 'actor_loss': -5202.996313476562, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04069296956062317, 'td_error': 1015.5448932547929, 'value_scale': 1.1037577715989637, 'discounted_advantage': -219.26312477406665, 'initial_state': 2.3730087280273438, 'diff_eval': 8015.80050293378} step=90000
2025-12-06 15:21.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.26it/s, critic_loss=1.75, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:22.19 [info     ] AWAC_20251206141631: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.020416456222534178, 'time_algorithm_update': 0.01977272081375122, 'critic_loss': 1.7509693670272828, 'actor_loss': -5202.999771972656, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0404945707321167, 'td_error': 996.7386273330086, 'value_scale': 0.8688633298756647, 'discounted_advantage': -217.1582524144218, 'initial_state': 2.0935540199279785, 'diff_eval': 8015.80050293378} step=91000
2025-12-06 15:22.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.70it/s, critic_loss=1.7, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:23.03 [info     ] AWAC_20251206141631: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.01934349536895752, 'time_algorithm_update': 0.02017400074005127, 'critic_loss': 1.7029869837760925, 'actor_loss': -5203.001427246094, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03979350185394287, 'td_error': 1016.8839264072346, 'value_scale': 1.0457933840637734, 'discounted_advantage': -219.0460314462542, 'initial_state': 2.0662548542022705, 'diff_eval': 8015.80050293378} step=92000
2025-12-06 15:23.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.89it/s, critic_loss=1.69, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:23.47 [info     ] AWAC_20251206141631: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.0195205237865448, 'time_algorithm_update': 0.01973105788230896, 'critic_loss': 1.6919256014823914, 'actor_loss': -5203.002905273437, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039529442548751834, 'td_error': 998.7071023767669, 'value_scale': 0.8108863173250175, 'discounted_advantage': -217.43210023091814, 'initial_state': 2.040365695953369, 'diff_eval': 8015.80050293378} step=93000
2025-12-06 15:23.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.39it/s, critic_loss=1.68, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:24.29 [info     ] AWAC_20251206141631: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.019462148427963257, 'time_algorithm_update': 0.018962162971496582, 'critic_loss': 1.6820283541679382, 'actor_loss': -5203.003349609375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03870036244392395, 'td_error': 1010.8741483730983, 'value_scale': 0.9643924831063779, 'discounted_advantage': -218.51432594891975, 'initial_state': 2.0056841373443604, 'diff_eval': 8015.80050293378} step=94000
2025-12-06 15:24.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=1.69, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:25.13 [info     ] AWAC_20251206141631: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.01955606484413147, 'time_algorithm_update': 0.019228310108184814, 'critic_loss': 1.6849439373016357, 'actor_loss': -5203.003459472657, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0390451672077179, 'td_error': 996.298059955404, 'value_scale': 0.7378131456802735, 'discounted_advantage': -217.0571174352519, 'initial_state': 1.9975544214248657, 'diff_eval': 8015.80050293378} step=95000
2025-12-06 15:25.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.15it/s, critic_loss=1.69, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:25.56 [info     ] AWAC_20251206141631: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.01976662254333496, 'time_algorithm_update': 0.019035221338272096, 'critic_loss': 1.6905706585645677, 'actor_loss': -5203.00324609375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03905870175361633, 'td_error': 1009.0864917779513, 'value_scale': 0.8183067223652011, 'discounted_advantage': -218.5514834479709, 'initial_state': 2.112947463989258, 'diff_eval': 8015.80050293378} step=96000
2025-12-06 15:25.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.43it/s, critic_loss=1.7, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:26.39 [info     ] AWAC_20251206141631: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.019387911558151245, 'time_algorithm_update': 0.01898728942871094, 'critic_loss': 1.7007354720830918, 'actor_loss': -5203.00246875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038645536184310915, 'td_error': 996.4642985339478, 'value_scale': 0.7700763295099377, 'discounted_advantage': -216.74602560632226, 'initial_state': 2.0285027027130127, 'diff_eval': 8015.80050293378} step=97000
2025-12-06 15:26.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.09it/s, critic_loss=1.7, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:27.22 [info     ] AWAC_20251206141631: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.01965381741523743, 'time_algorithm_update': 0.019228777170181274, 'critic_loss': 1.695500198483467, 'actor_loss': -5203.004073730469, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03916286063194275, 'td_error': 1018.9954426994597, 'value_scale': 0.9292487033595208, 'discounted_advantage': -219.4612112440685, 'initial_state': 1.8955858945846558, 'diff_eval': 8015.80050293378} step=98000
2025-12-06 15:27.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.31it/s, critic_loss=1.7, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:28.05 [info     ] AWAC_20251206141631: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.019355469465255737, 'time_algorithm_update': 0.019174298286437987, 'critic_loss': 1.6989360147714614, 'actor_loss': -5203.004914550781, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03880026054382324, 'td_error': 1014.1537270097957, 'value_scale': 1.0320866325368976, 'discounted_advantage': -219.09688323979037, 'initial_state': 2.0371434688568115, 'diff_eval': 8015.80050293378} step=99000
2025-12-06 15:28.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.14it/s, critic_loss=1.68, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:28.48 [info     ] AWAC_20251206141631: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.019610841274261475, 'time_algorithm_update': 0.019230082988739013, 'critic_loss': 1.6818301075696944, 'actor_loss': -5203.004734375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039102259397506715, 'td_error': 990.4921981433997, 'value_scale': 0.5765193138405843, 'discounted_advantage': -215.82052279989037, 'initial_state': 1.5956302881240845, 'diff_eval': 8015.80050293378} step=100000
2025-12-06 15:28.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.72it/s, critic_loss=1.65, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:29.33 [info     ] AWAC_20251206141631: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.0200047664642334, 'time_algorithm_update': 0.0194671413898468, 'critic_loss': 1.6542722370624543, 'actor_loss': -5203.005025878906, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039753355026245114, 'td_error': 1020.2618619232996, 'value_scale': 0.9351189815488465, 'discounted_advantage': -219.29868302796442, 'initial_state': 1.76713228225708, 'diff_eval': 8015.80050293378} step=101000
2025-12-06 15:29.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.52it/s, critic_loss=1.65, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:30.18 [info     ] AWAC_20251206141631: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.020527191638946535, 'time_algorithm_update': 0.019287306785583495, 'critic_loss': 1.647979879140854, 'actor_loss': -5203.005005859375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04010394835472107, 'td_error': 1008.5990205441437, 'value_scale': 0.8075528318692414, 'discounted_advantage': -218.47484613024415, 'initial_state': 1.763706922531128, 'diff_eval': 8015.80050293378} step=102000
2025-12-06 15:30.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.55it/s, critic_loss=1.65, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:31.03 [info     ] AWAC_20251206141631: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.02164838147163391, 'time_algorithm_update': 0.019858657360076904, 'critic_loss': 1.6471692600250245, 'actor_loss': -5203.004854492187, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.041783745050430296, 'td_error': 969.0787522271635, 'value_scale': 0.2626535090600661, 'discounted_advantage': -213.66890195293564, 'initial_state': 0.9620724320411682, 'diff_eval': 8015.80050293378} step=103000
2025-12-06 15:31.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.25it/s, critic_loss=1.65, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:31.47 [info     ] AWAC_20251206141631: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.019700477123260497, 'time_algorithm_update': 0.01892601203918457, 'critic_loss': 1.6533179918527603, 'actor_loss': -5203.004433105469, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038909733295440674, 'td_error': 990.9314079637386, 'value_scale': 0.6297402458471517, 'discounted_advantage': -216.0231563386111, 'initial_state': 1.3121353387832642, 'diff_eval': 8015.80050293378} step=104000
2025-12-06 15:31.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.15it/s, critic_loss=1.65, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:32.31 [info     ] AWAC_20251206141631: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.020978155851364137, 'time_algorithm_update': 0.019449796676635742, 'critic_loss': 1.645509380221367, 'actor_loss': -5203.0048344726565, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04071618247032165, 'td_error': 1003.8722130688751, 'value_scale': 0.6980623775938556, 'discounted_advantage': -217.5670663629807, 'initial_state': 1.2839508056640625, 'diff_eval': 8015.80050293378} step=105000
2025-12-06 15:32.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.33it/s, critic_loss=1.62, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:33.16 [info     ] AWAC_20251206141631: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.02028975510597229, 'time_algorithm_update': 0.01981741738319397, 'critic_loss': 1.624832238316536, 'actor_loss': -5203.004883300781, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.040402873039245606, 'td_error': 1011.8348878548359, 'value_scale': 1.029804255069395, 'discounted_advantage': -218.16613313122693, 'initial_state': 1.5015616416931152, 'diff_eval': 8015.80050293378} step=106000
2025-12-06 15:33.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.39it/s, critic_loss=1.63, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:33.59 [info     ] AWAC_20251206141631: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.019409212827682494, 'time_algorithm_update': 0.01905537176132202, 'critic_loss': 1.6277722918987274, 'actor_loss': -5203.00439453125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03873286271095276, 'td_error': 999.2652056438176, 'value_scale': 0.7055813917489715, 'discounted_advantage': -217.15808025752625, 'initial_state': 1.1777012348175049, 'diff_eval': 8015.80050293378} step=107000
2025-12-06 15:33.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.31it/s, critic_loss=1.62, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:34.42 [info     ] AWAC_20251206141631: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.019522429943084715, 'time_algorithm_update': 0.019041378498077393, 'critic_loss': 1.6244939926862716, 'actor_loss': -5203.004685058594, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03883637690544128, 'td_error': 1011.0986031145064, 'value_scale': 0.8658909978921161, 'discounted_advantage': -218.62552480380467, 'initial_state': 1.2646788358688354, 'diff_eval': 8015.80050293378} step=108000
2025-12-06 15:34.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.04it/s, critic_loss=1.62, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:35.25 [info     ] AWAC_20251206141631: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.019855772018432617, 'time_algorithm_update': 0.019118255853652955, 'critic_loss': 1.6211608225107192, 'actor_loss': -5203.004169921875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03924528574943543, 'td_error': 982.8546554042999, 'value_scale': 0.6022730340489498, 'discounted_advantage': -215.14589757004745, 'initial_state': 0.6832947731018066, 'diff_eval': 8015.80050293378} step=109000
2025-12-06 15:35.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.06it/s, critic_loss=1.6, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:36.08 [info     ] AWAC_20251206141631: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.019899945497512818, 'time_algorithm_update': 0.019048591375350952, 'critic_loss': 1.6022557682991028, 'actor_loss': -5203.004084472656, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039217530012130734, 'td_error': 1004.9158662726313, 'value_scale': 0.7333441003110988, 'discounted_advantage': -217.57564639222903, 'initial_state': 0.6814270615577698, 'diff_eval': 8015.80050293378} step=110000
2025-12-06 15:36.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.59it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:36.52 [info     ] AWAC_20251206141631: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.020165382862091064, 'time_algorithm_update': 0.01952774453163147, 'critic_loss': 1.6127460877895354, 'actor_loss': -5203.004194335937, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03996841430664062, 'td_error': 1016.0273087590571, 'value_scale': 0.9208860015785454, 'discounted_advantage': -218.62055703751167, 'initial_state': 1.0348212718963623, 'diff_eval': 8015.80050293378} step=111000
2025-12-06 15:36.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.23it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:37.35 [info     ] AWAC_20251206141631: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.019520641088485717, 'time_algorithm_update': 0.01914338707923889, 'critic_loss': 1.604973268508911, 'actor_loss': -5203.003845214844, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038942406177520754, 'td_error': 1001.0556775093729, 'value_scale': 0.6931017078037709, 'discounted_advantage': -217.37891247390445, 'initial_state': 0.32325562834739685, 'diff_eval': 8015.80050293378} step=112000
2025-12-06 15:37.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.67it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:38.19 [info     ] AWAC_20251206141631: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.02014550495147705, 'time_algorithm_update': 0.01940405535697937, 'critic_loss': 1.6049306625127793, 'actor_loss': -5203.0032944335935, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03984355878829956, 'td_error': 989.8430532227121, 'value_scale': 0.5022907380287107, 'discounted_advantage': -215.81398301022367, 'initial_state': -0.012973994947969913, 'diff_eval': 8015.80050293378} step=113000
2025-12-06 15:38.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.96it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:39.04 [info     ] AWAC_20251206141631: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.02094820213317871, 'time_algorithm_update': 0.01975585389137268, 'critic_loss': 1.6138571882247925, 'actor_loss': -5203.003001953125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.040999812364578246, 'td_error': 1008.2177556180379, 'value_scale': 0.6244086369291285, 'discounted_advantage': -217.69763915908447, 'initial_state': -0.05467937886714935, 'diff_eval': 8015.80050293378} step=114000
2025-12-06 15:39.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.66it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:39.49 [info     ] AWAC_20251206141631: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.020005271434783936, 'time_algorithm_update': 0.01962013053894043, 'critic_loss': 1.6134264649152756, 'actor_loss': -5203.003049804687, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03988548731803894, 'td_error': 1001.3194470887704, 'value_scale': 0.5239611175410881, 'discounted_advantage': -217.31502096877946, 'initial_state': -0.36357125639915466, 'diff_eval': 8015.80050293378} step=115000
2025-12-06 15:39.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.22it/s, critic_loss=1.63, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:40.33 [info     ] AWAC_20251206141631: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.019574060916900636, 'time_algorithm_update': 0.019162416696548463, 'critic_loss': 1.6323120294809341, 'actor_loss': -5203.002052246094, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03899746799468994, 'td_error': 1014.2693928874552, 'value_scale': 0.547145641160612, 'discounted_advantage': -218.44797306948172, 'initial_state': -0.631112813949585, 'diff_eval': 8015.80050293378} step=116000
2025-12-06 15:40.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.92it/s, critic_loss=1.64, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:41.16 [info     ] AWAC_20251206141631: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.019356720447540283, 'time_algorithm_update': 0.01982123112678528, 'critic_loss': 1.6415899724960328, 'actor_loss': -5203.002304199219, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03944509053230286, 'td_error': 975.1390260455073, 'value_scale': 0.16154972275122625, 'discounted_advantage': -214.2213104991076, 'initial_state': -0.9823721051216125, 'diff_eval': 8015.80050293378} step=117000
2025-12-06 15:41.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.95it/s, critic_loss=1.63, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:42.00 [info     ] AWAC_20251206141631: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.02009791159629822, 'time_algorithm_update': 0.01898351263999939, 'critic_loss': 1.630110200047493, 'actor_loss': -5203.00215234375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03935862636566162, 'td_error': 986.9579496617197, 'value_scale': 0.38595998851849395, 'discounted_advantage': -215.81142787405966, 'initial_state': -0.74928879737854, 'diff_eval': 8015.80050293378} step=118000
2025-12-06 15:42.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.40it/s, critic_loss=1.62, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:42.43 [info     ] AWAC_20251206141631: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.019563632726669312, 'time_algorithm_update': 0.018874675989151, 'critic_loss': 1.6224414639472962, 'actor_loss': -5203.00168359375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03872080945968628, 'td_error': 984.4170035885674, 'value_scale': 0.3533969902185987, 'discounted_advantage': -214.99139684172164, 'initial_state': -0.9805827140808105, 'diff_eval': 8015.80050293378} step=119000
2025-12-06 15:42.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.12it/s, critic_loss=1.63, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:43.26 [info     ] AWAC_20251206141631: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.019641674518585204, 'time_algorithm_update': 0.019202770709991455, 'critic_loss': 1.6327310206890107, 'actor_loss': -5203.001900878906, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039119221687316894, 'td_error': 1004.5353192824787, 'value_scale': 0.47334752762916577, 'discounted_advantage': -217.39473903535196, 'initial_state': -0.9086463451385498, 'diff_eval': 8015.80050293378} step=120000
2025-12-06 15:43.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.13it/s, critic_loss=1.64, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:44.09 [info     ] AWAC_20251206141631: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.01973468780517578, 'time_algorithm_update': 0.019093993425369264, 'critic_loss': 1.6367743279933928, 'actor_loss': -5203.001356933593, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03910898303985596, 'td_error': 997.9566235660606, 'value_scale': 0.5303787617410993, 'discounted_advantage': -216.58084457103152, 'initial_state': -0.8424988389015198, 'diff_eval': 8015.80050293378} step=121000
2025-12-06 15:44.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.81it/s, critic_loss=1.63, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:44.53 [info     ] AWAC_20251206141631: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.020173346281051635, 'time_algorithm_update': 0.019204431772232057, 'critic_loss': 1.6334388840198517, 'actor_loss': -5203.001456054688, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039657004594802855, 'td_error': 1041.9291271230397, 'value_scale': 0.9829556311903036, 'discounted_advantage': -221.16679922361425, 'initial_state': -0.37589791417121887, 'diff_eval': 8015.80050293378} step=122000
2025-12-06 15:44.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.27it/s, critic_loss=1.64, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:45.36 [info     ] AWAC_20251206141631: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.01951529884338379, 'time_algorithm_update': 0.01912643647193909, 'critic_loss': 1.6415266205072403, 'actor_loss': -5203.0000390625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03891339635848999, 'td_error': 1000.5356272667328, 'value_scale': 0.447551869699452, 'discounted_advantage': -217.2454798536867, 'initial_state': -1.0557783842086792, 'diff_eval': 8015.80050293378} step=123000
2025-12-06 15:45.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.16it/s, critic_loss=1.64, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:46.20 [info     ] AWAC_20251206141631: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.019681188106536865, 'time_algorithm_update': 0.01909406900405884, 'critic_loss': 1.639287201166153, 'actor_loss': -5202.999135253906, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039048938035964965, 'td_error': 1009.0387746453926, 'value_scale': 0.7088160489542192, 'discounted_advantage': -217.9234543813686, 'initial_state': -0.757378101348877, 'diff_eval': 8015.80050293378} step=124000
2025-12-06 15:46.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=1.64, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:47.03 [info     ] AWAC_20251206141631: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.019665283918380738, 'time_algorithm_update': 0.01908428502082825, 'critic_loss': 1.6396720079183578, 'actor_loss': -5202.999619140625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039021926164627074, 'td_error': 1007.9927084112148, 'value_scale': 0.5292384768207772, 'discounted_advantage': -217.72432120349274, 'initial_state': -1.128722071647644, 'diff_eval': 8015.80050293378} step=125000
2025-12-06 15:47.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.74it/s, critic_loss=1.64, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:47.48 [info     ] AWAC_20251206141631: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.020799181699752808, 'time_algorithm_update': 0.02029300332069397, 'critic_loss': 1.6406400913000108, 'actor_loss': -5203.000263183594, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.041395920276641844, 'td_error': 1015.6784511044083, 'value_scale': 0.6818754627975914, 'discounted_advantage': -218.72233520154973, 'initial_state': -0.9395598769187927, 'diff_eval': 8015.80050293378} step=126000
2025-12-06 15:47.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.23it/s, critic_loss=1.63, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:48.33 [info     ] AWAC_20251206141631: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.02080637741088867, 'time_algorithm_update': 0.019444066286087035, 'critic_loss': 1.6299741094112397, 'actor_loss': -5202.999431640625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04054287648200989, 'td_error': 1023.676348928147, 'value_scale': 0.8947758359907934, 'discounted_advantage': -219.41003522792266, 'initial_state': -0.6008689403533936, 'diff_eval': 8015.80050293378} step=127000
2025-12-06 15:48.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.80it/s, critic_loss=1.62, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:49.18 [info     ] AWAC_20251206141631: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.020213794708251954, 'time_algorithm_update': 0.01913546872138977, 'critic_loss': 1.623503063440323, 'actor_loss': -5203.000199707031, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03963513779640198, 'td_error': 965.4431635094851, 'value_scale': 0.13900594474522948, 'discounted_advantage': -213.158994577344, 'initial_state': -1.2126247882843018, 'diff_eval': 8015.80050293378} step=128000
2025-12-06 15:49.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.81it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:50.04 [info     ] AWAC_20251206141631: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.02055655288696289, 'time_algorithm_update': 0.020473003625869752, 'critic_loss': 1.6148816919326783, 'actor_loss': -5202.998708007813, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04130378198623657, 'td_error': 1001.7181502865488, 'value_scale': 0.5471937551447296, 'discounted_advantage': -217.31431494946702, 'initial_state': -0.7822145819664001, 'diff_eval': 8015.80050293378} step=129000
2025-12-06 15:50.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.63it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:50.48 [info     ] AWAC_20251206141631: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.020312654733657835, 'time_algorithm_update': 0.019322554111480712, 'critic_loss': 1.6114130026102067, 'actor_loss': -5202.9991640625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03991855025291443, 'td_error': 984.4930351151146, 'value_scale': 0.14406488517772373, 'discounted_advantage': -215.14285028533067, 'initial_state': -1.290840744972229, 'diff_eval': 8015.80050293378} step=130000
2025-12-06 15:50.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.12it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:51.31 [info     ] AWAC_20251206141631: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.019631828784942625, 'time_algorithm_update': 0.019185134649276733, 'critic_loss': 1.6151506862640381, 'actor_loss': -5202.998747558594, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039097680568695066, 'td_error': 994.2538894229004, 'value_scale': 0.4566098776991789, 'discounted_advantage': -216.58240401652716, 'initial_state': -0.970106840133667, 'diff_eval': 8015.80050293378} step=131000
2025-12-06 15:51.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:52.14 [info     ] AWAC_20251206141631: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.01963284969329834, 'time_algorithm_update': 0.01912348508834839, 'critic_loss': 1.6099757549762725, 'actor_loss': -5202.998836914063, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03904721570014954, 'td_error': 997.3427534071119, 'value_scale': 0.44842848685130554, 'discounted_advantage': -216.77119960858207, 'initial_state': -1.0010796785354614, 'diff_eval': 8015.80050293378} step=132000
2025-12-06 15:52.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:40<00:00, 25.00it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:52.58 [info     ] AWAC_20251206141631: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.019772418022155763, 'time_algorithm_update': 0.01924687886238098, 'critic_loss': 1.6095963469743728, 'actor_loss': -5202.995537109375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0393021342754364, 'td_error': 1005.6815991958307, 'value_scale': 0.650427883130257, 'discounted_advantage': -217.56637963983087, 'initial_state': -0.49136075377464294, 'diff_eval': 8015.80050293378} step=133000
2025-12-06 15:52.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.79it/s, critic_loss=1.61, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:53.40 [info     ] AWAC_20251206141631: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.01916234374046326, 'time_algorithm_update': 0.01866536235809326, 'critic_loss': 1.60807564163208, 'actor_loss': -5202.997825683594, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03810978364944458, 'td_error': 999.189185131374, 'value_scale': 0.43854804309584633, 'discounted_advantage': -216.71737218824703, 'initial_state': -1.0014318227767944, 'diff_eval': 8015.80050293378} step=134000
2025-12-06 15:53.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.69it/s, critic_loss=1.6, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:54.22 [info     ] AWAC_20251206141631: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.019271085262298582, 'time_algorithm_update': 0.018714012145996092, 'critic_loss': 1.6045466014146805, 'actor_loss': -5202.995320800781, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038252013921737674, 'td_error': 1016.3659252397528, 'value_scale': 0.6971974108315073, 'discounted_advantage': -218.60289153567825, 'initial_state': -0.9717311263084412, 'diff_eval': 8015.80050293378} step=135000
2025-12-06 15:54.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.43it/s, critic_loss=1.6, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:55.05 [info     ] AWAC_20251206141631: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.019309019088745116, 'time_algorithm_update': 0.019018423318862915, 'critic_loss': 1.6005363725423813, 'actor_loss': -5202.996862304687, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038611016273498534, 'td_error': 1022.3979852973258, 'value_scale': 0.8798045884109003, 'discounted_advantage': -219.53678760573897, 'initial_state': -0.44461262226104736, 'diff_eval': 8015.80050293378} step=136000
2025-12-06 15:55.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.76it/s, critic_loss=1.6, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:55.47 [info     ] AWAC_20251206141631: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.01924815845489502, 'time_algorithm_update': 0.01862027621269226, 'critic_loss': 1.600691139101982, 'actor_loss': -5202.992861328125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0381360011100769, 'td_error': 992.0676314492795, 'value_scale': 0.403401982296049, 'discounted_advantage': -216.07854985754668, 'initial_state': -1.051661491394043, 'diff_eval': 8015.80050293378} step=137000
2025-12-06 15:55.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.37it/s, critic_loss=1.59, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:56.30 [info     ] AWAC_20251206141631: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.019614126205444334, 'time_algorithm_update': 0.01885872030258179, 'critic_loss': 1.5892359310388564, 'actor_loss': -5202.992976074219, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03875300931930542, 'td_error': 988.5321126403451, 'value_scale': 0.3363870005244319, 'discounted_advantage': -215.60120849616993, 'initial_state': -1.109049916267395, 'diff_eval': 8015.80050293378} step=138000
2025-12-06 15:56.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.83it/s, critic_loss=1.6, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:57.16 [info     ] AWAC_20251206141631: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.020741288661956788, 'time_algorithm_update': 0.020223905324935912, 'critic_loss': 1.5996937329769134, 'actor_loss': -5202.994631835937, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04125834321975708, 'td_error': 1026.1416071236324, 'value_scale': 0.7539105682594004, 'discounted_advantage': -219.95102912897408, 'initial_state': -0.4048628509044647, 'diff_eval': 8015.80050293378} step=139000
2025-12-06 15:57.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.77it/s, critic_loss=1.59, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:57.58 [info     ] AWAC_20251206141631: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.019268742084503174, 'time_algorithm_update': 0.01854470753669739, 'critic_loss': 1.5880444955825805, 'actor_loss': -5202.995011230469, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0380883960723877, 'td_error': 1019.4308717931025, 'value_scale': 0.715996147836781, 'discounted_advantage': -218.8391465266154, 'initial_state': -0.47709882259368896, 'diff_eval': 8015.80050293378} step=140000
2025-12-06 15:57.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.99it/s, critic_loss=1.57, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:58.41 [info     ] AWAC_20251206141631: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.01993974208831787, 'time_algorithm_update': 0.019021339178085326, 'critic_loss': 1.5724190447330475, 'actor_loss': -5202.995950195313, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039266627550125124, 'td_error': 1022.0650497178692, 'value_scale': 0.73614805845945, 'discounted_advantage': -219.6462544671628, 'initial_state': -0.6297025084495544, 'diff_eval': 8015.80050293378} step=141000
2025-12-06 15:58.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.86it/s, critic_loss=1.56, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 15:59.25 [info     ] AWAC_20251206141631: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.020060643911361694, 'time_algorithm_update': 0.01916578483581543, 'critic_loss': 1.5635463215112686, 'actor_loss': -5202.9926997070315, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03951383709907532, 'td_error': 1013.6190745532782, 'value_scale': 0.6503016219731211, 'discounted_advantage': -218.22059276877678, 'initial_state': -0.5031588077545166, 'diff_eval': 8015.80050293378} step=142000
2025-12-06 15:59.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.46it/s, critic_loss=1.55, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:00.09 [info     ] AWAC_20251206141631: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.020359392642974854, 'time_algorithm_update': 0.019545579671859742, 'critic_loss': 1.5523095024824143, 'actor_loss': -5202.993666992187, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.040175211429595944, 'td_error': 969.7600345547031, 'value_scale': 0.07610950473936436, 'discounted_advantage': -213.6584904270469, 'initial_state': -1.2057968378067017, 'diff_eval': 8015.80050293378} step=143000
2025-12-06 16:00.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.59it/s, critic_loss=1.56, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:00.54 [info     ] AWAC_20251206141631: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.020294484853744507, 'time_algorithm_update': 0.019355918645858766, 'critic_loss': 1.5642218947410584, 'actor_loss': -5202.99442578125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03994084119796753, 'td_error': 1003.6973738381737, 'value_scale': 0.4754731573776497, 'discounted_advantage': -217.4619876106996, 'initial_state': -0.8365963101387024, 'diff_eval': 8015.80050293378} step=144000
2025-12-06 16:00.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.23it/s, critic_loss=1.57, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:01.37 [info     ] AWAC_20251206141631: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.019634756565093996, 'time_algorithm_update': 0.019041568517684935, 'critic_loss': 1.5692674870491028, 'actor_loss': -5202.986180175782, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038952868461608885, 'td_error': 999.5943333369456, 'value_scale': 0.4128036187338734, 'discounted_advantage': -216.99027480285048, 'initial_state': -0.8904432654380798, 'diff_eval': 8015.80050293378} step=145000
2025-12-06 16:01.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.95it/s, critic_loss=1.57, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:02.20 [info     ] AWAC_20251206141631: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.020044389486312868, 'time_algorithm_update': 0.019115912437438966, 'critic_loss': 1.5650945979356765, 'actor_loss': -5202.989833984375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039424456119537356, 'td_error': 1006.9667812857981, 'value_scale': 0.6538865499125055, 'discounted_advantage': -217.88421825953114, 'initial_state': -0.234153613448143, 'diff_eval': 8015.80050293378} step=146000
2025-12-06 16:02.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.36it/s, critic_loss=1.56, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:03.03 [info     ] AWAC_20251206141631: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.019482765913009643, 'time_algorithm_update': 0.019018627882003786, 'critic_loss': 1.5546709126234055, 'actor_loss': -5202.991014648437, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03877877712249756, 'td_error': 1016.9296493600013, 'value_scale': 0.8551441156406858, 'discounted_advantage': -218.85669427841825, 'initial_state': 0.0020827483385801315, 'diff_eval': 8015.80050293378} step=147000
2025-12-06 16:03.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.97it/s, critic_loss=1.56, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:03.47 [info     ] AWAC_20251206141631: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.019820729494094847, 'time_algorithm_update': 0.019297330617904663, 'critic_loss': 1.5593732731342316, 'actor_loss': -5202.986957519532, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03938833212852478, 'td_error': 996.4006687293801, 'value_scale': 0.5384588102659418, 'discounted_advantage': -216.83450099706454, 'initial_state': -0.4048411548137665, 'diff_eval': 8015.80050293378} step=148000
2025-12-06 16:03.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.23it/s, critic_loss=1.56, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:04.30 [info     ] AWAC_20251206141631: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.01961609959602356, 'time_algorithm_update': 0.01904635453224182, 'critic_loss': 1.5569972232580185, 'actor_loss': -5202.983438964844, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03895221614837646, 'td_error': 1009.7938459044366, 'value_scale': 0.7500722918168417, 'discounted_advantage': -218.03817692506595, 'initial_state': -0.1541004329919815, 'diff_eval': 8015.80050293378} step=149000
2025-12-06 16:04.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.30it/s, critic_loss=1.55, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:05.13 [info     ] AWAC_20251206141631: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.019658201456069946, 'time_algorithm_update': 0.018931337118148803, 'critic_loss': 1.5516578059196473, 'actor_loss': -5202.9787026367185, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03885907363891602, 'td_error': 1000.8943051457121, 'value_scale': 0.5326928216277029, 'discounted_advantage': -217.23617816412073, 'initial_state': -0.13205325603485107, 'diff_eval': 8015.80050293378} step=150000
2025-12-06 16:05.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.40it/s, critic_loss=1.54, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:05.56 [info     ] AWAC_20251206141631: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.0194605815410614, 'time_algorithm_update': 0.018963526010513307, 'critic_loss': 1.5417890696525574, 'actor_loss': -5202.983849609375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03870740985870361, 'td_error': 998.662155331, 'value_scale': 0.5174902690939874, 'discounted_advantage': -216.82739363953144, 'initial_state': 0.06624267250299454, 'diff_eval': 8015.80050293378} step=151000
2025-12-06 16:05.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.20it/s, critic_loss=1.53, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:06.39 [info     ] AWAC_20251206141631: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.019607627868652344, 'time_algorithm_update': 0.0191067214012146, 'critic_loss': 1.5279014197587968, 'actor_loss': -5202.982966308594, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038988165855407715, 'td_error': 978.2708048098908, 'value_scale': 0.3007103024992845, 'discounted_advantage': -215.02085664484434, 'initial_state': -0.039364445954561234, 'diff_eval': 8015.80050293378} step=152000
2025-12-06 16:06.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.10it/s, critic_loss=1.51, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:07.24 [info     ] AWAC_20251206141631: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.019791821718215943, 'time_algorithm_update': 0.02074177050590515, 'critic_loss': 1.513278311252594, 'actor_loss': -5202.985591308594, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04080641627311707, 'td_error': 993.5834992729933, 'value_scale': 0.617980067611946, 'discounted_advantage': -216.3141279432713, 'initial_state': 0.7235969305038452, 'diff_eval': 8015.80050293378} step=153000
2025-12-06 16:07.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:37<00:00, 26.64it/s, critic_loss=1.48, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:08.05 [info     ] AWAC_20251206141631: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.018482118368148805, 'time_algorithm_update': 0.01809684991836548, 'critic_loss': 1.4743715521097183, 'actor_loss': -5202.991455566406, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03685646104812622, 'td_error': 998.54113103662, 'value_scale': 0.5809866680653589, 'discounted_advantage': -217.1037139023243, 'initial_state': 1.1049656867980957, 'diff_eval': 8015.80050293378} step=154000
2025-12-06 16:08.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.47it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:08.48 [info     ] AWAC_20251206141631: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.019359183549880983, 'time_algorithm_update': 0.018862585306167603, 'critic_loss': 1.4618141627907753, 'actor_loss': -5202.991027832031, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038506553173065186, 'td_error': 1011.2053015172473, 'value_scale': 0.8138653906819482, 'discounted_advantage': -218.2342653420578, 'initial_state': 1.197918176651001, 'diff_eval': 8015.80050293378} step=155000
2025-12-06 16:08.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.20it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:09.31 [info     ] AWAC_20251206141631: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.0197317156791687, 'time_algorithm_update': 0.019014546394348145, 'critic_loss': 1.460918453335762, 'actor_loss': -5202.9906743164065, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0390191707611084, 'td_error': 1019.8400197876229, 'value_scale': 1.0202150691096878, 'discounted_advantage': -219.20178289822326, 'initial_state': 1.788590431213379, 'diff_eval': 8015.80050293378} step=156000
2025-12-06 16:09.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.56it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:10.14 [info     ] AWAC_20251206141631: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.019446067810058594, 'time_algorithm_update': 0.018749823331832886, 'critic_loss': 1.4605887418985366, 'actor_loss': -5202.980489746094, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03846362447738647, 'td_error': 999.4595613664095, 'value_scale': 0.6268191827493911, 'discounted_advantage': -217.07754394158624, 'initial_state': 1.1782373189926147, 'diff_eval': 8015.80050293378} step=157000
2025-12-06 16:10.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.21it/s, critic_loss=1.45, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:10.57 [info     ] AWAC_20251206141631: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.019737711191177367, 'time_algorithm_update': 0.01897525453567505, 'critic_loss': 1.4472644802927972, 'actor_loss': -5202.970791015625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03897538542747497, 'td_error': 1019.889156922463, 'value_scale': 0.8454002159544597, 'discounted_advantage': -219.15668844393093, 'initial_state': 1.1898986101150513, 'diff_eval': 8015.80050293378} step=158000
2025-12-06 16:10.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.41it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:11.40 [info     ] AWAC_20251206141631: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.019506704568862915, 'time_algorithm_update': 0.018876928329467774, 'critic_loss': 1.4431284908056259, 'actor_loss': -5202.972303710938, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03866143202781677, 'td_error': 1013.7284792967895, 'value_scale': 0.8835998929932414, 'discounted_advantage': -218.61773737625643, 'initial_state': 1.2672866582870483, 'diff_eval': 8015.80050293378} step=159000
2025-12-06 16:11.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.39it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:12.23 [info     ] AWAC_20251206141631: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.019544774055480958, 'time_algorithm_update': 0.018889104127883912, 'critic_loss': 1.439260887503624, 'actor_loss': -5202.964411621094, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03870263075828552, 'td_error': 982.8813026388336, 'value_scale': 0.2962570787775927, 'discounted_advantage': -215.12063098207375, 'initial_state': 0.47840678691864014, 'diff_eval': 8015.80050293378} step=160000
2025-12-06 16:12.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.89it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:13.06 [info     ] AWAC_20251206141631: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.01984923243522644, 'time_algorithm_update': 0.01934047555923462, 'critic_loss': 1.4414681558012963, 'actor_loss': -5202.962033691406, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0394772264957428, 'td_error': 1019.7807634072722, 'value_scale': 0.8582419405323637, 'discounted_advantage': -219.35192950286418, 'initial_state': 0.9268100261688232, 'diff_eval': 8015.80050293378} step=161000
2025-12-06 16:13.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.93it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:13.50 [info     ] AWAC_20251206141631: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.019875784873962403, 'time_algorithm_update': 0.019265591382980347, 'critic_loss': 1.4437852600812913, 'actor_loss': -5202.9467607421875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0394113290309906, 'td_error': 1012.8763721212243, 'value_scale': 0.689294139066928, 'discounted_advantage': -218.46256850785414, 'initial_state': 0.6736522912979126, 'diff_eval': 8015.80050293378} step=162000
2025-12-06 16:13.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:38<00:00, 26.07it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:14.32 [info     ] AWAC_20251206141631: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.01883270764350891, 'time_algorithm_update': 0.018578428506851197, 'critic_loss': 1.437030061483383, 'actor_loss': -5202.938946289062, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.037692042589187624, 'td_error': 1016.9053338269964, 'value_scale': 0.720813295933553, 'discounted_advantage': -219.02124190598286, 'initial_state': 0.5573710799217224, 'diff_eval': 8015.80050293378} step=163000
2025-12-06 16:14.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.58it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:15.17 [info     ] AWAC_20251206141631: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.019802336931228636, 'time_algorithm_update': 0.020694939374923706, 'critic_loss': 1.436263767719269, 'actor_loss': -5202.9162416992185, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04171907019615173, 'td_error': 1004.3002842254991, 'value_scale': 0.6206757927462553, 'discounted_advantage': -217.34656448506576, 'initial_state': 0.5172567963600159, 'diff_eval': 8015.80050293378} step=164000
2025-12-06 16:15.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.78it/s, critic_loss=1.43, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:16.01 [info     ] AWAC_20251206141631: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.01979819679260254, 'time_algorithm_update': 0.01960936498641968, 'critic_loss': 1.4286826477646828, 'actor_loss': -5202.9276796875, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039674997806549074, 'td_error': 1016.7454752748573, 'value_scale': 0.6935409669407581, 'discounted_advantage': -218.3247415325422, 'initial_state': 0.5418025255203247, 'diff_eval': 8015.80050293378} step=165000
2025-12-06 16:16.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.48it/s, critic_loss=1.43, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:16.46 [info     ] AWAC_20251206141631: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.020295233964920043, 'time_algorithm_update': 0.019548054456710814, 'critic_loss': 1.4320119639635087, 'actor_loss': -5202.9391518554685, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04014168834686279, 'td_error': 1015.4635758581437, 'value_scale': 0.7429396338059824, 'discounted_advantage': -218.74535037640027, 'initial_state': 0.7692673206329346, 'diff_eval': 8015.80050293378} step=166000
2025-12-06 16:16.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.60it/s, critic_loss=1.43, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:17.28 [info     ] AWAC_20251206141631: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.019269142389297486, 'time_algorithm_update': 0.01883996653556824, 'critic_loss': 1.431693846464157, 'actor_loss': -5202.94790625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03839279341697693, 'td_error': 1007.3369875225367, 'value_scale': 0.7087268375753357, 'discounted_advantage': -217.78887149979064, 'initial_state': 0.6838675141334534, 'diff_eval': 8015.80050293378} step=167000
2025-12-06 16:17.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.08it/s, critic_loss=1.42, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:18.11 [info     ] AWAC_20251206141631: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.01975742506980896, 'time_algorithm_update': 0.019166314125061035, 'critic_loss': 1.4192400258779525, 'actor_loss': -5202.927273925781, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03919896101951599, 'td_error': 1012.928558209527, 'value_scale': 0.5510912747742969, 'discounted_advantage': -218.58528292631786, 'initial_state': 0.5457075238227844, 'diff_eval': 8015.80050293378} step=168000
2025-12-06 16:18.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.21it/s, critic_loss=1.43, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:18.55 [info     ] AWAC_20251206141631: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.019636999368667602, 'time_algorithm_update': 0.01902737808227539, 'critic_loss': 1.4297720267772676, 'actor_loss': -5202.940201660156, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038945819616317746, 'td_error': 1015.3456715146191, 'value_scale': 0.7353945642472413, 'discounted_advantage': -218.98739785491318, 'initial_state': 0.7428281307220459, 'diff_eval': 8015.80050293378} step=169000
2025-12-06 16:18.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.90it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:19.38 [info     ] AWAC_20251206141631: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.01996796989440918, 'time_algorithm_update': 0.019227081537246705, 'critic_loss': 1.4446493681669235, 'actor_loss': -5202.898653808594, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03947768998146057, 'td_error': 998.0924614279138, 'value_scale': 0.4631550646420609, 'discounted_advantage': -216.95614689294868, 'initial_state': 0.3336038291454315, 'diff_eval': 8015.80050293378} step=170000
2025-12-06 16:19.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.23it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:20.22 [info     ] AWAC_20251206141631: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.019624888181686402, 'time_algorithm_update': 0.019010318994522095, 'critic_loss': 1.4362731404304505, 'actor_loss': -5202.910810058594, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038917386531829835, 'td_error': 995.8188907151497, 'value_scale': 0.5467295777493711, 'discounted_advantage': -216.61245208141628, 'initial_state': 0.7113593816757202, 'diff_eval': 8015.80050293378} step=171000
2025-12-06 16:20.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.80it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:21.04 [info     ] AWAC_20251206141631: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.01911917519569397, 'time_algorithm_update': 0.018678991317749024, 'critic_loss': 1.440100448489189, 'actor_loss': -5202.926730957031, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03805829977989197, 'td_error': 991.8417687246518, 'value_scale': 0.28770437167078405, 'discounted_advantage': -215.93842492145305, 'initial_state': 0.546560525894165, 'diff_eval': 8015.80050293378} step=172000
2025-12-06 16:21.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.37it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:21.47 [info     ] AWAC_20251206141631: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.019336708784103394, 'time_algorithm_update': 0.019017141819000244, 'critic_loss': 1.4400091999769211, 'actor_loss': -5202.937213867188, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03866250371932983, 'td_error': 998.4555269017607, 'value_scale': 0.43622552645794754, 'discounted_advantage': -216.5698351671751, 'initial_state': 0.6601970195770264, 'diff_eval': 8015.80050293378} step=173000
2025-12-06 16:21.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.69it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:22.31 [info     ] AWAC_20251206141631: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.020124940633773805, 'time_algorithm_update': 0.01939077615737915, 'critic_loss': 1.442034104347229, 'actor_loss': -5202.939491210937, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03980120635032654, 'td_error': 1001.3138603636314, 'value_scale': 0.3879215893673664, 'discounted_advantage': -216.74634146427755, 'initial_state': 0.5171491503715515, 'diff_eval': 8015.80050293378} step=174000
2025-12-06 16:22.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.51it/s, critic_loss=1.44, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:23.13 [info     ] AWAC_20251206141631: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.01938321828842163, 'time_algorithm_update': 0.01887547016143799, 'critic_loss': 1.4434638483524322, 'actor_loss': -5202.93671484375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0385260226726532, 'td_error': 1011.6411384843532, 'value_scale': 0.6621781763560564, 'discounted_advantage': -218.08016766354223, 'initial_state': 0.9509247541427612, 'diff_eval': 8015.80050293378} step=175000
2025-12-06 16:23.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.61it/s, critic_loss=1.45, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:23.58 [info     ] AWAC_20251206141631: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.020480735778808594, 'time_algorithm_update': 0.019178576707839966, 'critic_loss': 1.4458969153165817, 'actor_loss': -5202.93656640625, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03994027280807495, 'td_error': 969.1903727800915, 'value_scale': 0.16018743003731825, 'discounted_advantage': -213.83831993121333, 'initial_state': 0.442730188369751, 'diff_eval': 8015.80050293378} step=176000
2025-12-06 16:23.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.31it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:24.43 [info     ] AWAC_20251206141631: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.02017069435119629, 'time_algorithm_update': 0.0199688241481781, 'critic_loss': 1.4589260939359665, 'actor_loss': -5202.937924316407, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.040429383993148806, 'td_error': 992.2873552375374, 'value_scale': 0.3507378086508607, 'discounted_advantage': -216.2393304205523, 'initial_state': 0.6945512294769287, 'diff_eval': 8015.80050293378} step=177000
2025-12-06 16:24.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.96it/s, critic_loss=1.45, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:25.25 [info     ] AWAC_20251206141631: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.019050690412521363, 'time_algorithm_update': 0.018491748094558717, 'critic_loss': 1.450065175294876, 'actor_loss': -5202.940354003907, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0378191876411438, 'td_error': 997.0170573081367, 'value_scale': 0.4852224121784381, 'discounted_advantage': -216.80227102334695, 'initial_state': 0.9469814300537109, 'diff_eval': 8015.80050293378} step=178000
2025-12-06 16:25.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.82it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:26.07 [info     ] AWAC_20251206141631: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.019121786832809448, 'time_algorithm_update': 0.018655415534973144, 'critic_loss': 1.4551658296585084, 'actor_loss': -5202.943809082031, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03805132699012757, 'td_error': 1024.684140606395, 'value_scale': 0.7185525625353961, 'discounted_advantage': -219.59619756670244, 'initial_state': 1.173258900642395, 'diff_eval': 8015.80050293378} step=179000
2025-12-06 16:26.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:38<00:00, 25.90it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:26.49 [info     ] AWAC_20251206141631: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.018910127639770506, 'time_algorithm_update': 0.018740261793136596, 'critic_loss': 1.4638280564546584, 'actor_loss': -5202.942134277344, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.0379266676902771, 'td_error': 993.7928154862379, 'value_scale': 0.23913647011484504, 'discounted_advantage': -215.96732390136657, 'initial_state': 0.7590813636779785, 'diff_eval': 8015.80050293378} step=180000
2025-12-06 16:26.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.58it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:27.31 [info     ] AWAC_20251206141631: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.019458882808685303, 'time_algorithm_update': 0.018684706687927245, 'critic_loss': 1.4607441668510437, 'actor_loss': -5202.940263183594, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03842093300819397, 'td_error': 989.3472206843986, 'value_scale': 0.4636170086368906, 'discounted_advantage': -215.9515948451892, 'initial_state': 0.9868624806404114, 'diff_eval': 8015.80050293378} step=181000
2025-12-06 16:27.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.26it/s, critic_loss=1.47, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:28.14 [info     ] AWAC_20251206141631: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.019628576040267944, 'time_algorithm_update': 0.0189645938873291, 'critic_loss': 1.4682401938438416, 'actor_loss': -5202.963234375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03888435626029968, 'td_error': 1017.2537512389944, 'value_scale': 0.7237941500858059, 'discounted_advantage': -218.6843965400798, 'initial_state': 1.4229296445846558, 'diff_eval': 8015.80050293378} step=182000
2025-12-06 16:28.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.44it/s, critic_loss=1.47, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:28.57 [info     ] AWAC_20251206141631: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.019513537168502806, 'time_algorithm_update': 0.01878555965423584, 'critic_loss': 1.465094614624977, 'actor_loss': -5202.966844726562, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03858740615844727, 'td_error': 961.2904733600575, 'value_scale': -0.11535457521520887, 'discounted_advantage': -212.9806862391063, 'initial_state': 0.4836772084236145, 'diff_eval': 8015.80050293378} step=183000
2025-12-06 16:28.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.57it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:29.40 [info     ] AWAC_20251206141631: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.0193267867565155, 'time_algorithm_update': 0.01878863716125488, 'critic_loss': 1.4584410252571105, 'actor_loss': -5202.964852050782, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03839518690109253, 'td_error': 1002.279322166663, 'value_scale': 0.6284978981184471, 'discounted_advantage': -217.1453693416744, 'initial_state': 1.2571156024932861, 'diff_eval': 8015.80050293378} step=184000
2025-12-06 16:29.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.41it/s, critic_loss=1.47, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:30.23 [info     ] AWAC_20251206141631: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.01944235825538635, 'time_algorithm_update': 0.018934258699417113, 'critic_loss': 1.4665179470777512, 'actor_loss': -5202.9702109375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038661698579788205, 'td_error': 989.7062396157598, 'value_scale': 0.2451359608186423, 'discounted_advantage': -216.1187067182612, 'initial_state': 0.8145126104354858, 'diff_eval': 8015.80050293378} step=185000
2025-12-06 16:30.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.61it/s, critic_loss=1.48, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:31.09 [info     ] AWAC_20251206141631: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.02100295376777649, 'time_algorithm_update': 0.01979188060760498, 'critic_loss': 1.483484512090683, 'actor_loss': -5202.967442382813, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.041052171468734744, 'td_error': 1004.9294455726587, 'value_scale': 0.6167095110362141, 'discounted_advantage': -217.55400838553714, 'initial_state': 1.2114930152893066, 'diff_eval': 8015.80050293378} step=186000
2025-12-06 16:31.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:38<00:00, 26.02it/s, critic_loss=1.47, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:31.50 [info     ] AWAC_20251206141631: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.018673841238021852, 'time_algorithm_update': 0.0187632040977478, 'critic_loss': 1.4739761357307435, 'actor_loss': -5202.972090332031, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03770423817634583, 'td_error': 1023.6334881140095, 'value_scale': 0.8823480061196847, 'discounted_advantage': -219.3488805811591, 'initial_state': 1.735967755317688, 'diff_eval': 8015.80050293378} step=187000
2025-12-06 16:31.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.44it/s, critic_loss=1.48, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:32.37 [info     ] AWAC_20251206141631: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.021293527126312255, 'time_algorithm_update': 0.020304426193237305, 'critic_loss': 1.4809836539030075, 'actor_loss': -5202.979033691407, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.041904319286346435, 'td_error': 986.5477299721613, 'value_scale': 0.3196727994603807, 'discounted_advantage': -215.42018810284708, 'initial_state': 1.103366494178772, 'diff_eval': 8015.80050293378} step=188000
2025-12-06 16:32.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.77it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:33.21 [info     ] AWAC_20251206141631: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.019548428773880005, 'time_algorithm_update': 0.019840697526931763, 'critic_loss': 1.4609644253253937, 'actor_loss': -5202.978862792968, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03968416142463684, 'td_error': 992.311230322019, 'value_scale': 0.44907374238893577, 'discounted_advantage': -215.9559766437773, 'initial_state': 1.206140160560608, 'diff_eval': 8015.80050293378} step=189000
2025-12-06 16:33.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:38<00:00, 26.02it/s, critic_loss=1.47, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:34.03 [info     ] AWAC_20251206141631: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.018897292613983156, 'time_algorithm_update': 0.018570058345794678, 'critic_loss': 1.4672414045333861, 'actor_loss': -5202.987094726563, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03775102567672729, 'td_error': 1017.8110802097096, 'value_scale': 0.7773712756219948, 'discounted_advantage': -218.8988655856431, 'initial_state': 1.6127729415893555, 'diff_eval': 8015.80050293378} step=190000
2025-12-06 16:34.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.82it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:34.47 [info     ] AWAC_20251206141631: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.02016146421432495, 'time_algorithm_update': 0.019113388299942018, 'critic_loss': 1.4600537139177323, 'actor_loss': -5202.977224609375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039561764001846314, 'td_error': 1011.9990787195875, 'value_scale': 0.6060498010828522, 'discounted_advantage': -218.27448757488278, 'initial_state': 1.4334744215011597, 'diff_eval': 8015.80050293378} step=191000
2025-12-06 16:34.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.16it/s, critic_loss=1.47, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:35.30 [info     ] AWAC_20251206141631: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.019756475925445556, 'time_algorithm_update': 0.01901654028892517, 'critic_loss': 1.4739899982213973, 'actor_loss': -5202.9835522460935, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.039050495147705075, 'td_error': 1026.6839487815564, 'value_scale': 0.7771239825567325, 'discounted_advantage': -219.74115728140757, 'initial_state': 1.5435707569122314, 'diff_eval': 8015.80050293378} step=192000
2025-12-06 16:35.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.21it/s, critic_loss=1.48, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:36.13 [info     ] AWAC_20251206141631: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.019675870180130006, 'time_algorithm_update': 0.01901964831352234, 'critic_loss': 1.4745900210142135, 'actor_loss': -5202.984649414063, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03896396517753601, 'td_error': 996.1317430337888, 'value_scale': 0.42648447539785383, 'discounted_advantage': -216.522419574242, 'initial_state': 1.2310397624969482, 'diff_eval': 8015.80050293378} step=193000
2025-12-06 16:36.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.25it/s, critic_loss=1.48, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:36.56 [info     ] AWAC_20251206141631: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.019789839744567873, 'time_algorithm_update': 0.01887079644203186, 'critic_loss': 1.4797153722047807, 'actor_loss': -5202.98268359375, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03893946385383606, 'td_error': 1002.5496496664769, 'value_scale': 0.3862703335678325, 'discounted_advantage': -217.2500008342544, 'initial_state': 1.1935136318206787, 'diff_eval': 8015.80050293378} step=194000
2025-12-06 16:36.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.13it/s, critic_loss=1.48, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:37.39 [info     ] AWAC_20251206141631: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.01966070628166199, 'time_algorithm_update': 0.019132516384124755, 'critic_loss': 1.4805223579406739, 'actor_loss': -5202.984477539063, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03906570506095886, 'td_error': 1000.9739352323913, 'value_scale': 0.3718546835552629, 'discounted_advantage': -216.94845376625236, 'initial_state': 1.139794111251831, 'diff_eval': 8015.80050293378} step=195000
2025-12-06 16:37.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.35it/s, critic_loss=1.47, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:38.24 [info     ] AWAC_20251206141631: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.02040447998046875, 'time_algorithm_update': 0.019676211833953856, 'critic_loss': 1.4713149521350861, 'actor_loss': -5202.982575195312, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04036860942840576, 'td_error': 982.2888965421541, 'value_scale': 0.29358493604705876, 'discounted_advantage': -215.04585131411167, 'initial_state': 1.0995469093322754, 'diff_eval': 8015.80050293378} step=196000
2025-12-06 16:38.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.56it/s, critic_loss=1.47, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:39.08 [info     ] AWAC_20251206141631: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.02013974404335022, 'time_algorithm_update': 0.01958701181411743, 'critic_loss': 1.470541587293148, 'actor_loss': -5202.988366210938, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.040010431289672854, 'td_error': 1007.9856646497701, 'value_scale': 0.5395535616639748, 'discounted_advantage': -217.5765252130716, 'initial_state': 1.427209734916687, 'diff_eval': 8015.80050293378} step=197000
2025-12-06 16:39.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.05it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:39.53 [info     ] AWAC_20251206141631: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.020963476896286012, 'time_algorithm_update': 0.019632791519165037, 'critic_loss': 1.462218337059021, 'actor_loss': -5202.989282714844, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.04092094349861145, 'td_error': 996.5236824132176, 'value_scale': 0.3770542193987943, 'discounted_advantage': -216.5345633788276, 'initial_state': 1.205611228942871, 'diff_eval': 8015.80050293378} step=198000
2025-12-06 16:39.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.62it/s, critic_loss=1.47, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:40.36 [info     ] AWAC_20251206141631: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.019178836345672606, 'time_algorithm_update': 0.018914613962173463, 'critic_loss': 1.4675270391702653, 'actor_loss': -5202.992203125, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.038359215259552, 'td_error': 1007.4861813820582, 'value_scale': 0.520586630792809, 'discounted_advantage': -217.62521532207742, 'initial_state': 1.3269466161727905, 'diff_eval': 8015.80050293378} step=199000
2025-12-06 16:40.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.61it/s, critic_loss=1.46, actor_loss=-5.2e+3, temp=0, temp_loss=0]


2025-12-06 16:41.20 [info     ] AWAC_20251206141631: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.020452164888381957, 'time_algorithm_update': 0.019205561876296996, 'critic_loss': 1.4563235750198364, 'actor_loss': -5202.996069335937, 'temp': 0.0, 'temp_loss': 0.0, 'time_step': 0.03993913769721985, 'td_error': 979.6527619126267, 'value_scale': 0.1414335265338783, 'discounted_advantage': -214.95722112389203, 'initial_state': 1.0240145921707153, 'diff_eval': 8015.80050293378} step=200000
2025-12-06 16:41.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\AWAC_20251206141631\model_200000.d3
Training model:  PLAS
2025-12-06 16:41.20 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=1)


Epoch 1/200: 100%|██████████| 1000/1000 [00:10<00:00, 92.91it/s, vae_loss=0.0623]


2025-12-06 16:41.35 [info     ] PLAS_20251206164120: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.0019546515941619874, 'time_algorithm_update': 0.007473944664001465, 'vae_loss': 0.06214243583567441, 'time_step': 0.010555868625640868, 'td_error': 0.4490813122817245, 'value_scale': 0.04522359883100456, 'discounted_advantage': 0.0017990519226189542, 'initial_state': 0.06900926679372787, 'diff_eval': 2819.0513736533967} step=1000
2025-12-06 16:41.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:10<00:00, 94.78it/s, vae_loss=0.0404]


2025-12-06 16:41.49 [info     ] PLAS_20251206164120: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.0019423949718475342, 'time_algorithm_update': 0.00815382719039917, 'vae_loss': 0.04034044554457068, 'time_step': 0.010345171213150024, 'td_error': 0.4489196663899766, 'value_scale': 0.045012461446609725, 'discounted_advantage': 0.0028787639053472263, 'initial_state': 0.06854189187288284, 'diff_eval': 2677.818849683355} step=2000
2025-12-06 16:41.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:10<00:00, 92.32it/s, vae_loss=0.0396]


2025-12-06 16:42.04 [info     ] PLAS_20251206164120: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.0030437307357788087, 'time_algorithm_update': 0.00733078145980835, 'vae_loss': 0.039564672611653806, 'time_step': 0.010631665706634522, 'td_error': 0.4491051548224633, 'value_scale': 0.04528406741096008, 'discounted_advantage': 0.0015404997553998545, 'initial_state': 0.06920059770345688, 'diff_eval': 2565.0195161570923} step=3000
2025-12-06 16:42.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.37it/s, vae_loss=0.0378]


2025-12-06 16:42.17 [info     ] PLAS_20251206164120: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.0018651156425476074, 'time_algorithm_update': 0.007335358381271363, 'vae_loss': 0.037782444313168524, 'time_step': 0.00940499997138977, 'td_error': 0.44897799702710534, 'value_scale': 0.045162951715808905, 'discounted_advantage': 0.002024589090423282, 'initial_state': 0.06892845034599304, 'diff_eval': 2484.2998026141895} step=4000
2025-12-06 16:42.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.87it/s, vae_loss=0.0372]


2025-12-06 16:42.31 [info     ] PLAS_20251206164120: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.001974078416824341, 'time_algorithm_update': 0.007512960195541382, 'vae_loss': 0.037226272078230975, 'time_step': 0.009724540710449219, 'td_error': 0.4492336424898638, 'value_scale': 0.045484853728868735, 'discounted_advantage': 0.0008336814154626655, 'initial_state': 0.0697893425822258, 'diff_eval': 2457.2730911234225} step=5000
2025-12-06 16:42.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.01it/s, vae_loss=0.0372]


2025-12-06 16:42.45 [info     ] PLAS_20251206164120: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.0019133620262145996, 'time_algorithm_update': 0.0073649539947509765, 'vae_loss': 0.03729020105674863, 'time_step': 0.009518669366836548, 'td_error': 0.44898529118446234, 'value_scale': 0.04523792269905875, 'discounted_advantage': 0.0017924722667165725, 'initial_state': 0.06929811835289001, 'diff_eval': 2349.048460700912} step=6000
2025-12-06 16:42.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.52it/s, vae_loss=0.0366]


2025-12-06 16:42.59 [info     ] PLAS_20251206164120: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.0020358140468597413, 'time_algorithm_update': 0.007584977388381958, 'vae_loss': 0.03655791226401925, 'time_step': 0.009857437372207642, 'td_error': 0.44916488056896525, 'value_scale': 0.04540793991343985, 'discounted_advantage': 0.0009239002189621871, 'initial_state': 0.06948970258235931, 'diff_eval': 2284.8012437132975} step=7000
2025-12-06 16:42.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:10<00:00, 95.05it/s, vae_loss=0.0356]


2025-12-06 16:43.13 [info     ] PLAS_20251206164120: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.002112502574920654, 'time_algorithm_update': 0.007910866498947143, 'vae_loss': 0.03563984011299908, 'time_step': 0.010288310766220093, 'td_error': 0.44891206667387196, 'value_scale': 0.04514601182860806, 'discounted_advantage': 0.0020679978654227897, 'initial_state': 0.0689234510064125, 'diff_eval': 2122.8440593711516} step=8000
2025-12-06 16:43.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.04it/s, vae_loss=0.0343]


2025-12-06 16:43.26 [info     ] PLAS_20251206164120: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.0018669908046722412, 'time_algorithm_update': 0.007278560876846313, 'vae_loss': 0.034332078458741304, 'time_step': 0.00935345721244812, 'td_error': 0.44922198946939734, 'value_scale': 0.0455431892358591, 'discounted_advantage': 0.0006496453690129999, 'initial_state': 0.06996466219425201, 'diff_eval': 2014.6241393102714} step=9000
2025-12-06 16:43.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.64it/s, vae_loss=0.0334]


2025-12-06 16:43.40 [info     ] PLAS_20251206164120: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.0018569185733795165, 'time_algorithm_update': 0.007299048900604248, 'vae_loss': 0.03341761047951877, 'time_step': 0.009371090173721314, 'td_error': 0.4489757496038536, 'value_scale': 0.04528667421563398, 'discounted_advantage': 0.0017047286244255267, 'initial_state': 0.06954267621040344, 'diff_eval': 1864.515518379864} step=10000
2025-12-06 16:43.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.04it/s, vae_loss=0.0323]


2025-12-06 16:43.54 [info     ] PLAS_20251206164120: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.0020487446784973147, 'time_algorithm_update': 0.00757669448852539, 'vae_loss': 0.03226334763411433, 'time_step': 0.009891968250274658, 'td_error': 0.44924728529268215, 'value_scale': 0.045593713713093204, 'discounted_advantage': 0.00013556545863912105, 'initial_state': 0.0697389543056488, 'diff_eval': 1720.1780165841203} step=11000
2025-12-06 16:43.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.62it/s, vae_loss=0.0318]


2025-12-06 16:44.08 [info     ] PLAS_20251206164120: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.001982271909713745, 'time_algorithm_update': 0.007497200965881348, 'vae_loss': 0.031799736447632315, 'time_step': 0.009736815929412841, 'td_error': 0.44896139948466335, 'value_scale': 0.04524116675281228, 'discounted_advantage': 0.0017521237336697397, 'initial_state': 0.06907899677753448, 'diff_eval': 1605.658470111926} step=12000
2025-12-06 16:44.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.92it/s, vae_loss=0.031]


2025-12-06 16:44.21 [info     ] PLAS_20251206164120: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.0019232845306396485, 'time_algorithm_update': 0.007479674577713012, 'vae_loss': 0.031019429080188274, 'time_step': 0.009624557018280029, 'td_error': 0.4491346210402187, 'value_scale': 0.045441541487034116, 'discounted_advantage': 0.0009437540399049763, 'initial_state': 0.06937158852815628, 'diff_eval': 1495.4967912680538} step=13000
2025-12-06 16:44.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.66it/s, vae_loss=0.0302]


2025-12-06 16:44.35 [info     ] PLAS_20251206164120: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.0019063239097595215, 'time_algorithm_update': 0.0074210169315338135, 'vae_loss': 0.030197296649217605, 'time_step': 0.009552248716354371, 'td_error': 0.44880168497443435, 'value_scale': 0.04506889360739045, 'discounted_advantage': 0.0023420475828019034, 'initial_state': 0.0685848817229271, 'diff_eval': 1468.344610978246} step=14000
2025-12-06 16:44.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.93it/s, vae_loss=0.0297]


2025-12-06 16:44.49 [info     ] PLAS_20251206164120: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.001910541534423828, 'time_algorithm_update': 0.007383857488632202, 'vae_loss': 0.02966148935817182, 'time_step': 0.009529402494430543, 'td_error': 0.4489110244411826, 'value_scale': 0.045175922652027335, 'discounted_advantage': 0.0018834008368070116, 'initial_state': 0.06872507929801941, 'diff_eval': 1423.4434482023946} step=15000
2025-12-06 16:44.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.86it/s, vae_loss=0.0287]


2025-12-06 16:45.02 [info     ] PLAS_20251206164120: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.0018989932537078857, 'time_algorithm_update': 0.0072217538356781005, 'vae_loss': 0.02870969265792519, 'time_step': 0.009343298673629761, 'td_error': 0.44910336812833507, 'value_scale': 0.045400039310436714, 'discounted_advantage': 0.0012873549121591142, 'initial_state': 0.06943002343177795, 'diff_eval': 1271.8511607807588} step=16000
2025-12-06 16:45.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.71it/s, vae_loss=0.0285]


2025-12-06 16:45.15 [info     ] PLAS_20251206164120: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.001895963191986084, 'time_algorithm_update': 0.007257322072982788, 'vae_loss': 0.028521363593637942, 'time_step': 0.009366107940673827, 'td_error': 0.4490321230688698, 'value_scale': 0.04535130747068558, 'discounted_advantage': 0.0014236034688214578, 'initial_state': 0.06938078999519348, 'diff_eval': 1255.7474964415653} step=17000
2025-12-06 16:45.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.72it/s, vae_loss=0.0278]


2025-12-06 16:45.29 [info     ] PLAS_20251206164120: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.0019233691692352294, 'time_algorithm_update': 0.007373791694641113, 'vae_loss': 0.027804303519427778, 'time_step': 0.009542551517486573, 'td_error': 0.44913787554925316, 'value_scale': 0.045399000152339726, 'discounted_advantage': 0.0011494096388320178, 'initial_state': 0.06890073418617249, 'diff_eval': 1169.6255583546936} step=18000
2025-12-06 16:45.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.78it/s, vae_loss=0.0271]


2025-12-06 16:45.43 [info     ] PLAS_20251206164120: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.0019482705593109131, 'time_algorithm_update': 0.007383290290832519, 'vae_loss': 0.027109891465865076, 'time_step': 0.00954929280281067, 'td_error': 0.4490561547052422, 'value_scale': 0.04525482545006468, 'discounted_advantage': 0.0016176087010207033, 'initial_state': 0.06853315234184265, 'diff_eval': 1159.799883713614} step=19000
2025-12-06 16:45.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.49it/s, vae_loss=0.0267]


2025-12-06 16:45.56 [info     ] PLAS_20251206164120: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.0019609994888305665, 'time_algorithm_update': 0.007457582950592041, 'vae_loss': 0.026688940311782063, 'time_step': 0.00966119909286499, 'td_error': 0.44906560417451313, 'value_scale': 0.04529278197115769, 'discounted_advantage': 0.0016002128188086842, 'initial_state': 0.06866186857223511, 'diff_eval': 1143.4839332792822} step=20000
2025-12-06 16:45.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.46it/s, vae_loss=0.0265]


2025-12-06 16:46.10 [info     ] PLAS_20251206164120: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.0019347176551818847, 'time_algorithm_update': 0.007414131641387939, 'vae_loss': 0.026508107744157315, 'time_step': 0.00957007598876953, 'td_error': 0.4492747454610935, 'value_scale': 0.04555274935459526, 'discounted_advantage': 0.0004491828617087803, 'initial_state': 0.06941503286361694, 'diff_eval': 1123.9866841296262} step=21000
2025-12-06 16:46.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.67it/s, vae_loss=0.0257]


2025-12-06 16:46.24 [info     ] PLAS_20251206164120: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.0019425866603851318, 'time_algorithm_update': 0.00748484206199646, 'vae_loss': 0.025689377718605102, 'time_step': 0.009650068044662476, 'td_error': 0.4493452544760381, 'value_scale': 0.045541213318699314, 'discounted_advantage': 0.00042631773770646694, 'initial_state': 0.06872087717056274, 'diff_eval': 1031.292341105334} step=22000
2025-12-06 16:46.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.71it/s, vae_loss=0.025]


2025-12-06 16:46.37 [info     ] PLAS_20251206164120: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.001886840581893921, 'time_algorithm_update': 0.007311299324035645, 'vae_loss': 0.025039393457584082, 'time_step': 0.00944992709159851, 'td_error': 0.44931638717860056, 'value_scale': 0.045543554826808996, 'discounted_advantage': 0.00042721845940568113, 'initial_state': 0.06912310421466827, 'diff_eval': 1102.6949801853173} step=23000
2025-12-06 16:46.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.38it/s, vae_loss=0.025] 


2025-12-06 16:46.51 [info     ] PLAS_20251206164120: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.002013033628463745, 'time_algorithm_update': 0.007609110832214355, 'vae_loss': 0.025003905423916877, 'time_step': 0.009862987279891967, 'td_error': 0.44948634314822256, 'value_scale': 0.04575450835423193, 'discounted_advantage': -0.0006341612407361766, 'initial_state': 0.06929794698953629, 'diff_eval': 1007.7017881372732} step=24000
2025-12-06 16:46.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.71it/s, vae_loss=0.0244]


2025-12-06 16:47.04 [info     ] PLAS_20251206164120: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.0019319813251495361, 'time_algorithm_update': 0.007297087669372559, 'vae_loss': 0.024376224051229654, 'time_step': 0.009450958728790284, 'td_error': 0.44915166826938085, 'value_scale': 0.045406175055209316, 'discounted_advantage': 0.0011413289642040144, 'initial_state': 0.06895501166582108, 'diff_eval': 993.3331954053243} step=25000
2025-12-06 16:47.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.68it/s, vae_loss=0.024]


2025-12-06 16:47.18 [info     ] PLAS_20251206164120: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.0019144201278686523, 'time_algorithm_update': 0.00739832067489624, 'vae_loss': 0.024027573755942284, 'time_step': 0.009552424907684326, 'td_error': 0.4493761865301651, 'value_scale': 0.04566749196877424, 'discounted_advantage': 0.000260086539603492, 'initial_state': 0.06978253275156021, 'diff_eval': 1036.6551651823006} step=26000
2025-12-06 16:47.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.71it/s, vae_loss=0.0239]


2025-12-06 16:47.32 [info     ] PLAS_20251206164120: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.0019192001819610595, 'time_algorithm_update': 0.007389405965805054, 'vae_loss': 0.023940110908821224, 'time_step': 0.009546308040618897, 'td_error': 0.4494036635035973, 'value_scale': 0.04563082746961541, 'discounted_advantage': -7.496491208085858e-05, 'initial_state': 0.06921519339084625, 'diff_eval': 993.777009009501} step=27000
2025-12-06 16:47.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.31it/s, vae_loss=0.0233]


2025-12-06 16:47.45 [info     ] PLAS_20251206164120: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.001923877477645874, 'time_algorithm_update': 0.007399231195449829, 'vae_loss': 0.023247162864543498, 'time_step': 0.009573065042495728, 'td_error': 0.44927378077145413, 'value_scale': 0.045511160461561506, 'discounted_advantage': 0.0007297031170783848, 'initial_state': 0.0688871368765831, 'diff_eval': 929.8660317292495} step=28000
2025-12-06 16:47.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.86it/s, vae_loss=0.0234]


2025-12-06 16:47.59 [info     ] PLAS_20251206164120: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.001951740026473999, 'time_algorithm_update': 0.00736257004737854, 'vae_loss': 0.023368184251710774, 'time_step': 0.00954674220085144, 'td_error': 0.44933046737430976, 'value_scale': 0.04555196393896378, 'discounted_advantage': 0.00032452037503184815, 'initial_state': 0.06907866150140762, 'diff_eval': 954.8382342513003} step=29000
2025-12-06 16:47.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.56it/s, vae_loss=0.023]


2025-12-06 16:48.13 [info     ] PLAS_20251206164120: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.0019393811225891114, 'time_algorithm_update': 0.007467697858810424, 'vae_loss': 0.022953787241131066, 'time_step': 0.009644787073135376, 'td_error': 0.44921856410720024, 'value_scale': 0.045424780601515356, 'discounted_advantage': 0.000840296556016666, 'initial_state': 0.06839894503355026, 'diff_eval': 936.909631776844} step=30000
2025-12-06 16:48.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.54it/s, vae_loss=0.0222]


2025-12-06 16:48.26 [info     ] PLAS_20251206164120: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.001964003324508667, 'time_algorithm_update': 0.007456631183624268, 'vae_loss': 0.022167908603325487, 'time_step': 0.009659151315689087, 'td_error': 0.44947780615520916, 'value_scale': 0.04575608251967654, 'discounted_advantage': -0.0006936429655736138, 'initial_state': 0.06952383369207382, 'diff_eval': 962.162260943859} step=31000
2025-12-06 16:48.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.51it/s, vae_loss=0.022] 


2025-12-06 16:48.40 [info     ] PLAS_20251206164120: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.0020408122539520264, 'time_algorithm_update': 0.007567123413085937, 'vae_loss': 0.021959095017053187, 'time_step': 0.009851840019226074, 'td_error': 0.44898443978620006, 'value_scale': 0.045189656880477325, 'discounted_advantage': 0.0017463600857349024, 'initial_state': 0.06858053058385849, 'diff_eval': 1091.1473242853435} step=32000
2025-12-06 16:48.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.55it/s, vae_loss=0.0221]


2025-12-06 16:48.54 [info     ] PLAS_20251206164120: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.0018879499435424806, 'time_algorithm_update': 0.007326193809509277, 'vae_loss': 0.022224908396601677, 'time_step': 0.009461743116378785, 'td_error': 0.44930751736529223, 'value_scale': 0.04554192823016733, 'discounted_advantage': 0.0003524108094191148, 'initial_state': 0.06923965364694595, 'diff_eval': 957.2063403966902} step=33000
2025-12-06 16:48.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.84it/s, vae_loss=0.0217]


2025-12-06 16:49.07 [info     ] PLAS_20251206164120: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.0019506859779357911, 'time_algorithm_update': 0.007514474391937256, 'vae_loss': 0.021684561305679383, 'time_step': 0.009712430238723755, 'td_error': 0.4493117986889779, 'value_scale': 0.045545110888451285, 'discounted_advantage': 0.00040809473666753645, 'initial_state': 0.06903594732284546, 'diff_eval': 920.4642759588879} step=34000
2025-12-06 16:49.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.34it/s, vae_loss=0.0219]


2025-12-06 16:49.21 [info     ] PLAS_20251206164120: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.001972857236862183, 'time_algorithm_update': 0.007641219377517701, 'vae_loss': 0.02186162112094462, 'time_step': 0.00986108660697937, 'td_error': 0.4490534917255697, 'value_scale': 0.04529373981066404, 'discounted_advantage': 0.0015429755203074053, 'initial_state': 0.06867102533578873, 'diff_eval': 989.0987509506314} step=35000
2025-12-06 16:49.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.24it/s, vae_loss=0.0214]


2025-12-06 16:49.35 [info     ] PLAS_20251206164120: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.0018702335357666017, 'time_algorithm_update': 0.0072330799102783205, 'vae_loss': 0.02141640809364617, 'time_step': 0.009324845314025878, 'td_error': 0.4494800794962683, 'value_scale': 0.04574133508994064, 'discounted_advantage': -0.0005303944154839642, 'initial_state': 0.06928887963294983, 'diff_eval': 890.406901082046} step=36000
2025-12-06 16:49.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:10<00:00, 92.08it/s, vae_loss=0.0209]


2025-12-06 16:49.49 [info     ] PLAS_20251206164120: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.0020004606246948244, 'time_algorithm_update': 0.008435506820678711, 'vae_loss': 0.020902863569557666, 'time_step': 0.01067160439491272, 'td_error': 0.44918287491207753, 'value_scale': 0.045422199435535215, 'discounted_advantage': 0.0009000426678915503, 'initial_state': 0.06910333782434464, 'diff_eval': 961.9408362543992} step=37000
2025-12-06 16:49.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.82it/s, vae_loss=0.0206]


2025-12-06 16:50.04 [info     ] PLAS_20251206164120: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.002042250871658325, 'time_algorithm_update': 0.007740161657333374, 'vae_loss': 0.02063381847180426, 'time_step': 0.010030234098434449, 'td_error': 0.44911260920061763, 'value_scale': 0.04535971696223917, 'discounted_advantage': 0.0012472752211626942, 'initial_state': 0.06889792531728745, 'diff_eval': 930.3686406615253} step=38000
2025-12-06 16:50.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:11<00:00, 86.02it/s, vae_loss=0.0208]


2025-12-06 16:50.20 [info     ] PLAS_20251206164120: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.0024275310039520263, 'time_algorithm_update': 0.0086644127368927, 'vae_loss': 0.020766332658939065, 'time_step': 0.011379822254180908, 'td_error': 0.4493760987742939, 'value_scale': 0.04557019483714934, 'discounted_advantage': 0.00012231060055473622, 'initial_state': 0.06877832114696503, 'diff_eval': 866.4087743256055} step=39000
2025-12-06 16:50.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.70it/s, vae_loss=0.0205]


2025-12-06 16:50.34 [info     ] PLAS_20251206164120: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.001994075298309326, 'time_algorithm_update': 0.007515763282775879, 'vae_loss': 0.020496206702198833, 'time_step': 0.009734086275100707, 'td_error': 0.4493471380006794, 'value_scale': 0.045537485071666356, 'discounted_advantage': 0.00019551920428252496, 'initial_state': 0.06869704276323318, 'diff_eval': 869.99554655663} step=40000
2025-12-06 16:50.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.41it/s, vae_loss=0.0206]


2025-12-06 16:50.48 [info     ] PLAS_20251206164120: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.001945113182067871, 'time_algorithm_update': 0.007411552667617798, 'vae_loss': 0.020554635933600365, 'time_step': 0.009588740587234497, 'td_error': 0.44903978338099987, 'value_scale': 0.045194306905004325, 'discounted_advantage': 0.0014139935913534112, 'initial_state': 0.06806913018226624, 'diff_eval': 962.1746138723316} step=41000
2025-12-06 16:50.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.44it/s, vae_loss=0.0203]


2025-12-06 16:51.02 [info     ] PLAS_20251206164120: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.0020415637493133546, 'time_algorithm_update': 0.007682588815689087, 'vae_loss': 0.020315001670271157, 'time_step': 0.00996147847175598, 'td_error': 0.44909792284292016, 'value_scale': 0.04526447650897633, 'discounted_advantage': 0.0013250668175128117, 'initial_state': 0.06858482211828232, 'diff_eval': 1015.2448885272827} step=42000
2025-12-06 16:51.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.47it/s, vae_loss=0.0204]


2025-12-06 16:51.15 [info     ] PLAS_20251206164120: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.001972149133682251, 'time_algorithm_update': 0.007460629940032959, 'vae_loss': 0.020427103751339017, 'time_step': 0.009657712936401367, 'td_error': 0.4494833525519541, 'value_scale': 0.045747015130302535, 'discounted_advantage': -0.00026755993326389835, 'initial_state': 0.06955607235431671, 'diff_eval': 913.0046652982118} step=43000
2025-12-06 16:51.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.72it/s, vae_loss=0.0202]


2025-12-06 16:51.29 [info     ] PLAS_20251206164120: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.00187212872505188, 'time_algorithm_update': 0.007361167669296264, 'vae_loss': 0.020254766991361976, 'time_step': 0.009471078872680665, 'td_error': 0.4496084085004007, 'value_scale': 0.04580710451403588, 'discounted_advantage': -0.0009307542374171708, 'initial_state': 0.0691319927573204, 'diff_eval': 879.4438399546459} step=44000
2025-12-06 16:51.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.00it/s, vae_loss=0.0196]


2025-12-06 16:51.43 [info     ] PLAS_20251206164120: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.0022295920848846434, 'time_algorithm_update': 0.007638145208358765, 'vae_loss': 0.01964538846630603, 'time_step': 0.01011368727684021, 'td_error': 0.44934350991211447, 'value_scale': 0.04555353792407687, 'discounted_advantage': 0.0002935545935935528, 'initial_state': 0.06876492500305176, 'diff_eval': 842.433863881714} step=45000
2025-12-06 16:51.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.51it/s, vae_loss=0.0195]


2025-12-06 16:51.56 [info     ] PLAS_20251206164120: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.001900378942489624, 'time_algorithm_update': 0.007292574882507325, 'vae_loss': 0.019485452859662473, 'time_step': 0.009446709394454956, 'td_error': 0.4491140377097813, 'value_scale': 0.045296774132603475, 'discounted_advantage': 0.0014054112718629133, 'initial_state': 0.06834685057401657, 'diff_eval': 877.7160872086126} step=46000
2025-12-06 16:51.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.35it/s, vae_loss=0.0196]


2025-12-06 16:52.10 [info     ] PLAS_20251206164120: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.001964603900909424, 'time_algorithm_update': 0.00755483341217041, 'vae_loss': 0.019624035004526377, 'time_step': 0.009767117977142335, 'td_error': 0.4495033999902303, 'value_scale': 0.04571460572805971, 'discounted_advantage': 0.00012520147702050984, 'initial_state': 0.0690760388970375, 'diff_eval': 852.2370670526929} step=47000
2025-12-06 16:52.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.09it/s, vae_loss=0.0191]


2025-12-06 16:52.24 [info     ] PLAS_20251206164120: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.0020325686931610106, 'time_algorithm_update': 0.007781329154968261, 'vae_loss': 0.01912605179660022, 'time_step': 0.01007371997833252, 'td_error': 0.4494084977090688, 'value_scale': 0.0456100872265798, 'discounted_advantage': -0.0002583653397192398, 'initial_state': 0.06854692846536636, 'diff_eval': 787.8044628280476} step=48000
2025-12-06 16:52.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.00it/s, vae_loss=0.0195]


2025-12-06 16:52.38 [info     ] PLAS_20251206164120: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.0019424319267272948, 'time_algorithm_update': 0.007829566478729249, 'vae_loss': 0.01948358705919236, 'time_step': 0.010015863180160522, 'td_error': 0.44918505326273106, 'value_scale': 0.04536645407128801, 'discounted_advantage': 0.0007598966848861718, 'initial_state': 0.06849914789199829, 'diff_eval': 874.3500164904412} step=49000
2025-12-06 16:52.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.04it/s, vae_loss=0.0193]


2025-12-06 16:52.52 [info     ] PLAS_20251206164120: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.0018742778301239014, 'time_algorithm_update': 0.007487005233764649, 'vae_loss': 0.019288032272830605, 'time_step': 0.009606122016906738, 'td_error': 0.4493631201003633, 'value_scale': 0.04553880811921084, 'discounted_advantage': 0.00020790557030173705, 'initial_state': 0.06859207898378372, 'diff_eval': 793.5493596246662} step=50000
2025-12-06 16:52.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.69it/s, vae_loss=0.0188]


2025-12-06 16:53.06 [info     ] PLAS_20251206164120: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.0019711480140686033, 'time_algorithm_update': 0.007522648096084595, 'vae_loss': 0.018789363048039378, 'time_step': 0.009735568284988403, 'td_error': 0.4487764337542766, 'value_scale': 0.04492363501777318, 'discounted_advantage': 0.0028342591781530214, 'initial_state': 0.06788172572851181, 'diff_eval': 1059.3470131761292} step=51000
2025-12-06 16:53.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.42it/s, vae_loss=0.0188]


2025-12-06 16:53.20 [info     ] PLAS_20251206164120: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.001923159122467041, 'time_algorithm_update': 0.0074581606388092045, 'vae_loss': 0.01880401949863881, 'time_step': 0.009644867181777953, 'td_error': 0.4494458361969318, 'value_scale': 0.04566707732999028, 'discounted_advantage': -0.0003253273108905614, 'initial_state': 0.06936202943325043, 'diff_eval': 833.7157157337434} step=52000
2025-12-06 16:53.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.98it/s, vae_loss=0.0187]


2025-12-06 16:53.34 [info     ] PLAS_20251206164120: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.001973944902420044, 'time_algorithm_update': 0.00750203824043274, 'vae_loss': 0.018674841749481856, 'time_step': 0.009711010456085205, 'td_error': 0.4492384871941729, 'value_scale': 0.0455009731146361, 'discounted_advantage': 0.0007420657848059987, 'initial_state': 0.0692509114742279, 'diff_eval': 800.3072364308807} step=53000
2025-12-06 16:53.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.24it/s, vae_loss=0.0184]


2025-12-06 16:53.48 [info     ] PLAS_20251206164120: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.0019058990478515625, 'time_algorithm_update': 0.007382569551467896, 'vae_loss': 0.018403585206251592, 'time_step': 0.009507702112197876, 'td_error': 0.44935518794535384, 'value_scale': 0.045633480479754976, 'discounted_advantage': 0.00037738869062684595, 'initial_state': 0.06957938522100449, 'diff_eval': 785.1412581486138} step=54000
2025-12-06 16:53.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.14it/s, vae_loss=0.0183]


2025-12-06 16:54.01 [info     ] PLAS_20251206164120: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.0019560685157775878, 'time_algorithm_update': 0.00743045711517334, 'vae_loss': 0.01828817790094763, 'time_step': 0.00961075210571289, 'td_error': 0.4492893158504278, 'value_scale': 0.04554137263965402, 'discounted_advantage': 0.0006677891803442733, 'initial_state': 0.06910139322280884, 'diff_eval': 770.2043372661751} step=55000
2025-12-06 16:54.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.93it/s, vae_loss=0.0182]


2025-12-06 16:54.15 [info     ] PLAS_20251206164120: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.001998208284378052, 'time_algorithm_update': 0.007668915271759033, 'vae_loss': 0.018154445838183163, 'time_step': 0.00990846061706543, 'td_error': 0.44927076008187933, 'value_scale': 0.04546574218556197, 'discounted_advantage': 0.0006836814174323056, 'initial_state': 0.06859645992517471, 'diff_eval': 758.6725821674635} step=56000
2025-12-06 16:54.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.58it/s, vae_loss=0.0179]


2025-12-06 16:54.29 [info     ] PLAS_20251206164120: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.0019081320762634277, 'time_algorithm_update': 0.0074116942882537845, 'vae_loss': 0.01792821487551555, 'time_step': 0.009566147089004516, 'td_error': 0.4496487942615257, 'value_scale': 0.04591660357992987, 'discounted_advantage': -0.0010141248326698477, 'initial_state': 0.06958252191543579, 'diff_eval': 760.6423446996308} step=57000
2025-12-06 16:54.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.21it/s, vae_loss=0.0179]


2025-12-06 16:54.43 [info     ] PLAS_20251206164120: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.001880014419555664, 'time_algorithm_update': 0.007370604753494263, 'vae_loss': 0.01793429873138666, 'time_step': 0.009489384412765502, 'td_error': 0.44919649463223066, 'value_scale': 0.04538883385385097, 'discounted_advantage': 0.0008720188388344056, 'initial_state': 0.06849224865436554, 'diff_eval': 760.9804990047678} step=58000
2025-12-06 16:54.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.32it/s, vae_loss=0.0179] 


2025-12-06 16:54.57 [info     ] PLAS_20251206164120: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.0020190508365631105, 'time_algorithm_update': 0.007622647285461426, 'vae_loss': 0.01792051870841533, 'time_step': 0.00987981128692627, 'td_error': 0.44927688542403565, 'value_scale': 0.04549899373487675, 'discounted_advantage': 0.0008111639179529474, 'initial_state': 0.06884641945362091, 'diff_eval': 747.5762544212765} step=59000
2025-12-06 16:54.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.74it/s, vae_loss=0.0181]


2025-12-06 16:55.10 [info     ] PLAS_20251206164120: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.001973247528076172, 'time_algorithm_update': 0.00741469931602478, 'vae_loss': 0.01806730448268354, 'time_step': 0.009624478101730347, 'td_error': 0.4493222162199075, 'value_scale': 0.04547804380023082, 'discounted_advantage': 0.0005422253066191772, 'initial_state': 0.06852995604276657, 'diff_eval': 738.3898956885396} step=60000
2025-12-06 16:55.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.34it/s, vae_loss=0.018]


2025-12-06 16:55.24 [info     ] PLAS_20251206164120: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.0019181323051452636, 'time_algorithm_update': 0.007452447652816772, 'vae_loss': 0.0179922690410167, 'time_step': 0.009599357843399048, 'td_error': 0.449341834687429, 'value_scale': 0.04551416853043092, 'discounted_advantage': 0.0004066685535229008, 'initial_state': 0.06869998574256897, 'diff_eval': 724.0761120057381} step=61000
2025-12-06 16:55.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.35it/s, vae_loss=0.0173]


2025-12-06 16:55.37 [info     ] PLAS_20251206164120: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.001914846897125244, 'time_algorithm_update': 0.007363788366317749, 'vae_loss': 0.01734157798951492, 'time_step': 0.009500771760940552, 'td_error': 0.44933090228656236, 'value_scale': 0.045577277449308604, 'discounted_advantage': -5.299885576819433e-05, 'initial_state': 0.06889800727367401, 'diff_eval': 697.9146644818287} step=62000
2025-12-06 16:55.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.79it/s, vae_loss=0.0175]


2025-12-06 16:55.51 [info     ] PLAS_20251206164120: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.0020062878131866455, 'time_algorithm_update': 0.007567270517349243, 'vae_loss': 0.017462176749948412, 'time_step': 0.009821128606796264, 'td_error': 0.44957007321806913, 'value_scale': 0.045853313570334724, 'discounted_advantage': -0.0001650767275496147, 'initial_state': 0.06940418481826782, 'diff_eval': 749.9947250391722} step=63000
2025-12-06 16:55.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.70it/s, vae_loss=0.0171] 


2025-12-06 16:56.05 [info     ] PLAS_20251206164120: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.002010617733001709, 'time_algorithm_update': 0.007582532167434692, 'vae_loss': 0.017049978049937634, 'time_step': 0.009823753595352173, 'td_error': 0.4491720288844554, 'value_scale': 0.04532500830020721, 'discounted_advantage': 0.0010932333743507497, 'initial_state': 0.06820894032716751, 'diff_eval': 719.8508230521815} step=64000
2025-12-06 16:56.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.29it/s, vae_loss=0.0174]


2025-12-06 16:56.19 [info     ] PLAS_20251206164120: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.0019265367984771728, 'time_algorithm_update': 0.007444668054580689, 'vae_loss': 0.017427400786895305, 'time_step': 0.009592245101928711, 'td_error': 0.4491692892841314, 'value_scale': 0.04538009233721231, 'discounted_advantage': 0.0013871319957483395, 'initial_state': 0.06854266673326492, 'diff_eval': 729.481868607163} step=65000
2025-12-06 16:56.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.12it/s, vae_loss=0.0174]


2025-12-06 16:56.32 [info     ] PLAS_20251206164120: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.0018618836402893065, 'time_algorithm_update': 0.007327567100524903, 'vae_loss': 0.01739804726606235, 'time_step': 0.009421859025955201, 'td_error': 0.4492688618901583, 'value_scale': 0.04548729703568815, 'discounted_advantage': 0.00029155637938717505, 'initial_state': 0.06844249367713928, 'diff_eval': 695.701527627042} step=66000
2025-12-06 16:56.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.52it/s, vae_loss=0.0171]


2025-12-06 16:56.46 [info     ] PLAS_20251206164120: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.0019373881816864013, 'time_algorithm_update': 0.007482935190200806, 'vae_loss': 0.01705255581298843, 'time_step': 0.009660545587539673, 'td_error': 0.4491575170806159, 'value_scale': 0.04534721427362128, 'discounted_advantage': 0.0014113907740246285, 'initial_state': 0.06823854893445969, 'diff_eval': 715.9504780601528} step=67000
2025-12-06 16:56.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.88it/s, vae_loss=0.0173]


2025-12-06 16:57.00 [info     ] PLAS_20251206164120: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.0019521634578704833, 'time_algorithm_update': 0.007440191984176636, 'vae_loss': 0.017329819226637483, 'time_step': 0.009630140542984009, 'td_error': 0.44896649868208666, 'value_scale': 0.045116284789559674, 'discounted_advantage': 0.002415902839078076, 'initial_state': 0.06773276627063751, 'diff_eval': 734.8738813709356} step=68000
2025-12-06 16:57.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.75it/s, vae_loss=0.017]


2025-12-06 16:57.13 [info     ] PLAS_20251206164120: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.001883857011795044, 'time_algorithm_update': 0.0074271814823150635, 'vae_loss': 0.01701347051979974, 'time_step': 0.009545695304870606, 'td_error': 0.4494340809989655, 'value_scale': 0.045659009964819704, 'discounted_advantage': 0.0003300240136588282, 'initial_state': 0.06912665069103241, 'diff_eval': 675.5902817210962} step=69000
2025-12-06 16:57.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.04it/s, vae_loss=0.017]


2025-12-06 16:57.27 [info     ] PLAS_20251206164120: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.0019855167865753176, 'time_algorithm_update': 0.007585937261581421, 'vae_loss': 0.016983979954849928, 'time_step': 0.009813414812088013, 'td_error': 0.4492867148620824, 'value_scale': 0.04556282079635752, 'discounted_advantage': 0.0007219936223403357, 'initial_state': 0.06950810551643372, 'diff_eval': 678.9707656180375} step=70000
2025-12-06 16:57.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.61it/s, vae_loss=0.0166]


2025-12-06 16:57.41 [info     ] PLAS_20251206164120: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.0018869278430938721, 'time_algorithm_update': 0.00736817479133606, 'vae_loss': 0.01660098291002214, 'time_step': 0.00946992254257202, 'td_error': 0.44939023887937257, 'value_scale': 0.04558902276462851, 'discounted_advantage': 0.00035458249245935746, 'initial_state': 0.06861436367034912, 'diff_eval': 647.9699634922142} step=71000
2025-12-06 16:57.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:10<00:00, 95.48it/s, vae_loss=0.0162]


2025-12-06 16:57.55 [info     ] PLAS_20251206164120: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.002047959804534912, 'time_algorithm_update': 0.007977383136749268, 'vae_loss': 0.016243643764872105, 'time_step': 0.0102572500705719, 'td_error': 0.4494156163416333, 'value_scale': 0.04570820987066886, 'discounted_advantage': -5.680436806929051e-05, 'initial_state': 0.06928947567939758, 'diff_eval': 647.7657730104548} step=72000
2025-12-06 16:57.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.95it/s, vae_loss=0.0163]


2025-12-06 16:58.08 [info     ] PLAS_20251206164120: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.0019219906330108642, 'time_algorithm_update': 0.00748681640625, 'vae_loss': 0.016321124715730547, 'time_step': 0.009632340431213379, 'td_error': 0.4494192851949571, 'value_scale': 0.045624285973827075, 'discounted_advantage': -0.0003089735345749257, 'initial_state': 0.06887920200824738, 'diff_eval': 657.955461241727} step=73000
2025-12-06 16:58.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.76it/s, vae_loss=0.0163]


2025-12-06 16:58.22 [info     ] PLAS_20251206164120: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.002034499645233154, 'time_algorithm_update': 0.007530171155929566, 'vae_loss': 0.016277233785949648, 'time_step': 0.009818839311599731, 'td_error': 0.44912560065232765, 'value_scale': 0.045275878823968864, 'discounted_advantage': 0.0014571955126544988, 'initial_state': 0.06781031936407089, 'diff_eval': 687.7136146875773} step=74000
2025-12-06 16:58.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.58it/s, vae_loss=0.0162]


2025-12-06 16:58.37 [info     ] PLAS_20251206164120: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.001996877193450928, 'time_algorithm_update': 0.007678340911865234, 'vae_loss': 0.016200465923640876, 'time_step': 0.009935750484466552, 'td_error': 0.44936759948550253, 'value_scale': 0.04562875601811973, 'discounted_advantage': 0.00014283027785214763, 'initial_state': 0.06912556290626526, 'diff_eval': 627.2943049968312} step=75000
2025-12-06 16:58.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.03it/s, vae_loss=0.0161]


2025-12-06 16:58.50 [info     ] PLAS_20251206164120: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.0019748668670654295, 'time_algorithm_update': 0.007512339115142822, 'vae_loss': 0.016110114099923523, 'time_step': 0.009720228672027587, 'td_error': 0.44923879986591864, 'value_scale': 0.0454316084626016, 'discounted_advantage': 0.0006281795556328829, 'initial_state': 0.06841972470283508, 'diff_eval': 635.891439196694} step=76000
2025-12-06 16:58.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.09it/s, vae_loss=0.016]


2025-12-06 16:59.04 [info     ] PLAS_20251206164120: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.0018950910568237304, 'time_algorithm_update': 0.007382522583007813, 'vae_loss': 0.01595981391426176, 'time_step': 0.009507555484771729, 'td_error': 0.44923115257768864, 'value_scale': 0.04544978699165225, 'discounted_advantage': 0.0008732206434629494, 'initial_state': 0.06847360730171204, 'diff_eval': 636.6711983005813} step=77000
2025-12-06 16:59.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.93it/s, vae_loss=0.0157]


2025-12-06 16:59.17 [info     ] PLAS_20251206164120: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.0018635718822479247, 'time_algorithm_update': 0.0073544738292694095, 'vae_loss': 0.015730795439332725, 'time_step': 0.009446923017501831, 'td_error': 0.4492427241380714, 'value_scale': 0.04544527705154582, 'discounted_advantage': 0.0009140072757301632, 'initial_state': 0.06874022632837296, 'diff_eval': 625.130507543177} step=78000
2025-12-06 16:59.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.71it/s, vae_loss=0.0157]


2025-12-06 16:59.32 [info     ] PLAS_20251206164120: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.0019690093994140625, 'time_algorithm_update': 0.007686918020248413, 'vae_loss': 0.01571812297217548, 'time_step': 0.00992071557044983, 'td_error': 0.44933767292791754, 'value_scale': 0.0455919380419968, 'discounted_advantage': 0.000231838460380088, 'initial_state': 0.06922155618667603, 'diff_eval': 619.0564978941587} step=79000
2025-12-06 16:59.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:10<00:00, 96.72it/s, vae_loss=0.0155]


2025-12-06 16:59.47 [info     ] PLAS_20251206164120: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.001908073902130127, 'time_algorithm_update': 0.007984097003936768, 'vae_loss': 0.015545129724778235, 'time_step': 0.01014264178276062, 'td_error': 0.44942732164132243, 'value_scale': 0.04567643671553373, 'discounted_advantage': -1.6737792466513312e-05, 'initial_state': 0.0693904459476471, 'diff_eval': 611.8930361861514} step=80000
2025-12-06 16:59.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:10<00:00, 91.24it/s, vae_loss=0.0157]


2025-12-06 17:00.01 [info     ] PLAS_20251206164120: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.0030539724826812746, 'time_algorithm_update': 0.007486010074615478, 'vae_loss': 0.01564259920269251, 'time_step': 0.010775804281234742, 'td_error': 0.4494514774364521, 'value_scale': 0.04566860080392967, 'discounted_advantage': 2.18890552260112e-05, 'initial_state': 0.06891513615846634, 'diff_eval': 613.6003505465065} step=81000
2025-12-06 17:00.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.22it/s, vae_loss=0.0157]


2025-12-06 17:00.15 [info     ] PLAS_20251206164120: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.0019359467029571532, 'time_algorithm_update': 0.007532196760177612, 'vae_loss': 0.0156604598518461, 'time_step': 0.009694787502288818, 'td_error': 0.44917181075946655, 'value_scale': 0.045343161370895634, 'discounted_advantage': 0.0011193563957378424, 'initial_state': 0.06805065274238586, 'diff_eval': 624.1314645340882} step=82000
2025-12-06 17:00.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.95it/s, vae_loss=0.0157]


2025-12-06 17:00.29 [info     ] PLAS_20251206164120: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.0019196169376373292, 'time_algorithm_update': 0.007390565156936645, 'vae_loss': 0.0156853257343173, 'time_step': 0.009525896072387696, 'td_error': 0.44904262499384595, 'value_scale': 0.045141175590308766, 'discounted_advantage': 0.0017752736228425557, 'initial_state': 0.06743039190769196, 'diff_eval': 721.7435837464699} step=83000
2025-12-06 17:00.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.54it/s, vae_loss=0.0153]


2025-12-06 17:00.43 [info     ] PLAS_20251206164120: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.0019337112903594971, 'time_algorithm_update': 0.0074923255443573, 'vae_loss': 0.015283500515390187, 'time_step': 0.009664508819580078, 'td_error': 0.44910225751293215, 'value_scale': 0.04527735658347519, 'discounted_advantage': 0.00172824508329372, 'initial_state': 0.06798482686281204, 'diff_eval': 631.2006267392594} step=84000
2025-12-06 17:00.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.14it/s, vae_loss=0.0152]


2025-12-06 17:00.56 [info     ] PLAS_20251206164120: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.0019256534576416015, 'time_algorithm_update': 0.007374815940856934, 'vae_loss': 0.015240355401765555, 'time_step': 0.009515042781829835, 'td_error': 0.4492832474730979, 'value_scale': 0.04554428986163633, 'discounted_advantage': 0.0006762831360315429, 'initial_state': 0.06897040456533432, 'diff_eval': 564.7039632237239} step=85000
2025-12-06 17:00.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.46it/s, vae_loss=0.0151]


2025-12-06 17:01.10 [info     ] PLAS_20251206164120: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.0019029133319854736, 'time_algorithm_update': 0.0074425902366638185, 'vae_loss': 0.015074251521378755, 'time_step': 0.009583166122436523, 'td_error': 0.44931884088923335, 'value_scale': 0.04554296586208189, 'discounted_advantage': 0.0002634067069094248, 'initial_state': 0.0687226802110672, 'diff_eval': 592.5148056983103} step=86000
2025-12-06 17:01.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.58it/s, vae_loss=0.0152]


2025-12-06 17:01.24 [info     ] PLAS_20251206164120: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.002448294162750244, 'time_algorithm_update': 0.007251856327056885, 'vae_loss': 0.015197262185625731, 'time_step': 0.009947188138961793, 'td_error': 0.4492134613000236, 'value_scale': 0.045399546201285425, 'discounted_advantage': 0.0009855394009358348, 'initial_state': 0.06815778464078903, 'diff_eval': 573.391946335515} step=87000
2025-12-06 17:01.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.25it/s, vae_loss=0.015]


2025-12-06 17:01.38 [info     ] PLAS_20251206164120: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.002230218172073364, 'time_algorithm_update': 0.007235537052154541, 'vae_loss': 0.01498413426661864, 'time_step': 0.009690578699111939, 'td_error': 0.44934709412853924, 'value_scale': 0.045578153267243605, 'discounted_advantage': 0.00035431216120544197, 'initial_state': 0.06902351975440979, 'diff_eval': 590.0341033536176} step=88000
2025-12-06 17:01.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:10<00:00, 95.87it/s, vae_loss=0.015] 


2025-12-06 17:01.52 [info     ] PLAS_20251206164120: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.0025914640426635744, 'time_algorithm_update': 0.007419792175292969, 'vae_loss': 0.014995048350654542, 'time_step': 0.010243439912796021, 'td_error': 0.4493582794804031, 'value_scale': 0.04563516623060292, 'discounted_advantage': 0.0005251229925660309, 'initial_state': 0.06891731917858124, 'diff_eval': 562.8468599711982} step=89000
2025-12-06 17:01.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.96it/s, vae_loss=0.015] 


2025-12-06 17:02.06 [info     ] PLAS_20251206164120: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.001998239517211914, 'time_algorithm_update': 0.007532183408737183, 'vae_loss': 0.014982473220676184, 'time_step': 0.009785053491592408, 'td_error': 0.4492309894841934, 'value_scale': 0.04553405763167648, 'discounted_advantage': 0.0002684816348117057, 'initial_state': 0.06903792172670364, 'diff_eval': 586.8605416615399} step=90000
2025-12-06 17:02.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.81it/s, vae_loss=0.015] 


2025-12-06 17:02.20 [info     ] PLAS_20251206164120: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.0019310104846954345, 'time_algorithm_update': 0.007630546092987061, 'vae_loss': 0.01498010824713856, 'time_step': 0.009818870067596436, 'td_error': 0.4493693572081193, 'value_scale': 0.045591730286022966, 'discounted_advantage': 0.0003767542418074098, 'initial_state': 0.06870073080062866, 'diff_eval': 553.0362930675572} step=91000
2025-12-06 17:02.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.61it/s, vae_loss=0.0147]


2025-12-06 17:02.33 [info     ] PLAS_20251206164120: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.001924257516860962, 'time_algorithm_update': 0.007225049734115601, 'vae_loss': 0.014699070654809476, 'time_step': 0.00938148832321167, 'td_error': 0.4490553466490312, 'value_scale': 0.045226349363810786, 'discounted_advantage': 0.0015143432861700243, 'initial_state': 0.06794928759336472, 'diff_eval': 669.7393933254156} step=92000
2025-12-06 17:02.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.46it/s, vae_loss=0.0146]


2025-12-06 17:02.47 [info     ] PLAS_20251206164120: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.001962976932525635, 'time_algorithm_update': 0.007511433601379395, 'vae_loss': 0.014596989118494094, 'time_step': 0.009735558748245238, 'td_error': 0.4490840228426673, 'value_scale': 0.04532796398120599, 'discounted_advantage': 0.0008164003359209137, 'initial_state': 0.06839194148778915, 'diff_eval': 615.9806253217323} step=93000
2025-12-06 17:02.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.88it/s, vae_loss=0.0144]


2025-12-06 17:03.00 [info     ] PLAS_20251206164120: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.0018176047801971437, 'time_algorithm_update': 0.007198237180709839, 'vae_loss': 0.014374055305030197, 'time_step': 0.009256460189819337, 'td_error': 0.44929258590692633, 'value_scale': 0.04553568800942533, 'discounted_advantage': 0.001132832099157276, 'initial_state': 0.06862074881792068, 'diff_eval': 557.1790792722153} step=94000
2025-12-06 17:03.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.27it/s, vae_loss=0.0144]


2025-12-06 17:03.14 [info     ] PLAS_20251206164120: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.0018351776599884032, 'time_algorithm_update': 0.0072750308513641355, 'vae_loss': 0.01438521753391251, 'time_step': 0.009327263116836548, 'td_error': 0.44923529394812045, 'value_scale': 0.04544957810997251, 'discounted_advantage': 0.0010083740050853688, 'initial_state': 0.06852852553129196, 'diff_eval': 548.6458013594632} step=95000
2025-12-06 17:03.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.59it/s, vae_loss=0.0141]


2025-12-06 17:03.27 [info     ] PLAS_20251206164120: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.0018187706470489502, 'time_algorithm_update': 0.007181136846542358, 'vae_loss': 0.014129406311083585, 'time_step': 0.00921682357788086, 'td_error': 0.4491367886881497, 'value_scale': 0.045345030716415075, 'discounted_advantage': 0.001070927105135938, 'initial_state': 0.06816080957651138, 'diff_eval': 547.6343604986969} step=96000
2025-12-06 17:03.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.24it/s, vae_loss=0.0142]


2025-12-06 17:03.41 [info     ] PLAS_20251206164120: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.001890427589416504, 'time_algorithm_update': 0.007247615814208985, 'vae_loss': 0.014254899891559035, 'time_step': 0.009396117210388183, 'td_error': 0.44906370937642415, 'value_scale': 0.04527656035341653, 'discounted_advantage': 0.0012959677217590988, 'initial_state': 0.06789261102676392, 'diff_eval': 606.2186154138062} step=97000
2025-12-06 17:03.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.53it/s, vae_loss=0.0143]


2025-12-06 17:03.54 [info     ] PLAS_20251206164120: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.0019404256343841552, 'time_algorithm_update': 0.0073945651054382324, 'vae_loss': 0.014263204318936915, 'time_step': 0.009577961683273316, 'td_error': 0.4492557570156289, 'value_scale': 0.04550743173013152, 'discounted_advantage': 0.0006823221299812084, 'initial_state': 0.06886983662843704, 'diff_eval': 532.9180409033447} step=98000
2025-12-06 17:03.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.74it/s, vae_loss=0.0142]


2025-12-06 17:04.08 [info     ] PLAS_20251206164120: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.0019143397808074952, 'time_algorithm_update': 0.007506140947341919, 'vae_loss': 0.014230089067015797, 'time_step': 0.009647804021835326, 'td_error': 0.4494234597799418, 'value_scale': 0.04573385544474122, 'discounted_advantage': 2.2173494083998144e-05, 'initial_state': 0.06948394328355789, 'diff_eval': 538.0858157458687} step=99000
2025-12-06 17:04.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.08it/s, vae_loss=0.0139]


2025-12-06 17:04.22 [info     ] PLAS_20251206164120: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.0019005277156829833, 'time_algorithm_update': 0.007482511281967163, 'vae_loss': 0.013872247712220996, 'time_step': 0.009616122007369995, 'td_error': 0.4490739054722314, 'value_scale': 0.045261557669865164, 'discounted_advantage': 0.001584812511121384, 'initial_state': 0.06802862137556076, 'diff_eval': 578.3715307013633} step=100000
2025-12-06 17:04.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.58it/s, vae_loss=0.0137]


2025-12-06 17:04.35 [info     ] PLAS_20251206164120: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.002007076740264893, 'time_algorithm_update': 0.007599261045455933, 'vae_loss': 0.013760452757589518, 'time_step': 0.00985078740119934, 'td_error': 0.4492966216600678, 'value_scale': 0.0455447862788173, 'discounted_advantage': 0.0008347419830272846, 'initial_state': 0.06887020915746689, 'diff_eval': 543.488923444687} step=101000
2025-12-06 17:04.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.62it/s, vae_loss=0.0142]


2025-12-06 17:04.49 [info     ] PLAS_20251206164120: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.0019503154754638673, 'time_algorithm_update': 0.007489605188369751, 'vae_loss': 0.014141530421562493, 'time_step': 0.0096607403755188, 'td_error': 0.44947468468067003, 'value_scale': 0.04571221955979285, 'discounted_advantage': 0.0001020890553899643, 'initial_state': 0.06901185214519501, 'diff_eval': 504.58685079752644} step=102000
2025-12-06 17:04.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.57it/s, vae_loss=0.014]


2025-12-06 17:05.02 [info     ] PLAS_20251206164120: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.0018723819255828857, 'time_algorithm_update': 0.007388293266296387, 'vae_loss': 0.014031444494612515, 'time_step': 0.009483635425567627, 'td_error': 0.44920833433582624, 'value_scale': 0.045406145713473955, 'discounted_advantage': 0.0006514127199231011, 'initial_state': 0.06804970651865005, 'diff_eval': 531.6545679218975} step=103000
2025-12-06 17:05.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.09it/s, vae_loss=0.0138]


2025-12-06 17:05.16 [info     ] PLAS_20251206164120: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.0019102990627288818, 'time_algorithm_update': 0.00737265396118164, 'vae_loss': 0.013761434768792241, 'time_step': 0.009512579441070557, 'td_error': 0.44914795009870007, 'value_scale': 0.04542861024014595, 'discounted_advantage': 0.0011716243709674907, 'initial_state': 0.06874748319387436, 'diff_eval': 516.4898880717196} step=104000
2025-12-06 17:05.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.11it/s, vae_loss=0.0138]


2025-12-06 17:05.30 [info     ] PLAS_20251206164120: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.0019103150367736816, 'time_algorithm_update': 0.0075717768669128415, 'vae_loss': 0.013768034669104964, 'time_step': 0.009711493015289306, 'td_error': 0.44913020627354044, 'value_scale': 0.04532107594520578, 'discounted_advantage': 0.0013523765919525954, 'initial_state': 0.06822357326745987, 'diff_eval': 539.9268881085079} step=105000
2025-12-06 17:05.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.58it/s, vae_loss=0.0134]


2025-12-06 17:05.43 [info     ] PLAS_20251206164120: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.0018499581813812256, 'time_algorithm_update': 0.007333137989044189, 'vae_loss': 0.013404094854369759, 'time_step': 0.009388136625289917, 'td_error': 0.4492161492196589, 'value_scale': 0.04543041445553765, 'discounted_advantage': 0.0004436886498097087, 'initial_state': 0.06814240664243698, 'diff_eval': 504.8186837625829} step=106000
2025-12-06 17:05.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.32it/s, vae_loss=0.0135]


2025-12-06 17:05.58 [info     ] PLAS_20251206164120: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.0019854829311370848, 'time_algorithm_update': 0.007860803127288818, 'vae_loss': 0.013459754687733949, 'time_step': 0.010082775592803956, 'td_error': 0.4492872272513643, 'value_scale': 0.045516084214517874, 'discounted_advantage': 0.00027479761883462294, 'initial_state': 0.06878047436475754, 'diff_eval': 511.58802043258277} step=107000
2025-12-06 17:05.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.11it/s, vae_loss=0.0136]


2025-12-06 17:06.11 [info     ] PLAS_20251206164120: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.0019452960491180419, 'time_algorithm_update': 0.007536077022552491, 'vae_loss': 0.013639339818619192, 'time_step': 0.009711671829223632, 'td_error': 0.4489499622639378, 'value_scale': 0.045109990462723244, 'discounted_advantage': 0.001863314399573153, 'initial_state': 0.06748610734939575, 'diff_eval': 633.4106062397154} step=108000
2025-12-06 17:06.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.65it/s, vae_loss=0.0133]


2025-12-06 17:06.25 [info     ] PLAS_20251206164120: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.0018880934715270997, 'time_algorithm_update': 0.007470353603363037, 'vae_loss': 0.013315132525749505, 'time_step': 0.009570197820663453, 'td_error': 0.44917691556934825, 'value_scale': 0.045437511762764934, 'discounted_advantage': 0.0008095971431554952, 'initial_state': 0.06868074089288712, 'diff_eval': 520.1122362274177} step=109000
2025-12-06 17:06.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.72it/s, vae_loss=0.0136]


2025-12-06 17:06.39 [info     ] PLAS_20251206164120: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.0019224491119384766, 'time_algorithm_update': 0.0073870370388031, 'vae_loss': 0.013588086431380361, 'time_step': 0.009548460960388184, 'td_error': 0.44911505961575077, 'value_scale': 0.04532467781106277, 'discounted_advantage': 0.0010024677542199746, 'initial_state': 0.06823298335075378, 'diff_eval': 547.6037927504849} step=110000
2025-12-06 17:06.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.72it/s, vae_loss=0.0132]


2025-12-06 17:06.52 [info     ] PLAS_20251206164120: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.001905491590499878, 'time_algorithm_update': 0.0074269711971282956, 'vae_loss': 0.013206977016758173, 'time_step': 0.009551225662231445, 'td_error': 0.4491789784038939, 'value_scale': 0.04532704609294, 'discounted_advantage': 0.0009437686413037839, 'initial_state': 0.06808371841907501, 'diff_eval': 555.6715919492436} step=111000
2025-12-06 17:06.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.87it/s, vae_loss=0.0134]


2025-12-06 17:07.06 [info     ] PLAS_20251206164120: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.0018858799934387206, 'time_algorithm_update': 0.007491719961166382, 'vae_loss': 0.01340530918026343, 'time_step': 0.009618387699127198, 'td_error': 0.44936564462524425, 'value_scale': 0.04566545581854162, 'discounted_advantage': 0.0003245775735054397, 'initial_state': 0.06940915435552597, 'diff_eval': 490.8242208418164} step=112000
2025-12-06 17:07.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.59it/s, vae_loss=0.013] 


2025-12-06 17:07.20 [info     ] PLAS_20251206164120: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.0020278542041778564, 'time_algorithm_update': 0.007672148704528809, 'vae_loss': 0.01299584781564772, 'time_step': 0.00993450403213501, 'td_error': 0.4492612118260683, 'value_scale': 0.04551037730163546, 'discounted_advantage': 0.00031067685782590256, 'initial_state': 0.06891307234764099, 'diff_eval': 523.2455331006022} step=113000
2025-12-06 17:07.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.97it/s, vae_loss=0.013]


2025-12-06 17:07.34 [info     ] PLAS_20251206164120: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.001983659029006958, 'time_algorithm_update': 0.007495454549789429, 'vae_loss': 0.013039337859489023, 'time_step': 0.009708993434906006, 'td_error': 0.44933402737020495, 'value_scale': 0.04560652455823982, 'discounted_advantage': -3.855154552343619e-05, 'initial_state': 0.06883669644594193, 'diff_eval': 489.02647093858076} step=114000
2025-12-06 17:07.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.34it/s, vae_loss=0.013]


2025-12-06 17:07.47 [info     ] PLAS_20251206164120: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.0019484157562255858, 'time_algorithm_update': 0.007509123086929321, 'vae_loss': 0.013031518809031695, 'time_step': 0.009683176517486573, 'td_error': 0.44900358643652954, 'value_scale': 0.04517258885562457, 'discounted_advantage': 0.0018788669327989822, 'initial_state': 0.06794694066047668, 'diff_eval': 575.1420939268435} step=115000
2025-12-06 17:07.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.33it/s, vae_loss=0.013]


2025-12-06 17:08.01 [info     ] PLAS_20251206164120: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.001909620523452759, 'time_algorithm_update': 0.007419159412384033, 'vae_loss': 0.012957092942204327, 'time_step': 0.009576858282089233, 'td_error': 0.4491443566560955, 'value_scale': 0.045370955764154425, 'discounted_advantage': 0.0006062786177101776, 'initial_state': 0.06812988221645355, 'diff_eval': 573.7694831404848} step=116000
2025-12-06 17:08.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.14it/s, vae_loss=0.013]


2025-12-06 17:08.15 [info     ] PLAS_20251206164120: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.0018858361244201661, 'time_algorithm_update': 0.007446675777435303, 'vae_loss': 0.012992873145733028, 'time_step': 0.009584521532058715, 'td_error': 0.44939541591363646, 'value_scale': 0.04568945440718996, 'discounted_advantage': 6.561249762529562e-05, 'initial_state': 0.06902060657739639, 'diff_eval': 456.8427113649474} step=117000
2025-12-06 17:08.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.42it/s, vae_loss=0.0127]


2025-12-06 17:08.28 [info     ] PLAS_20251206164120: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.0018117330074310302, 'time_algorithm_update': 0.00716043758392334, 'vae_loss': 0.012684351742267608, 'time_step': 0.0092194504737854, 'td_error': 0.4489546143757954, 'value_scale': 0.0452029080050533, 'discounted_advantage': 0.001802797133908612, 'initial_state': 0.06840036064386368, 'diff_eval': 595.361933682022} step=118000
2025-12-06 17:08.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.40it/s, vae_loss=0.0128]


2025-12-06 17:08.41 [info     ] PLAS_20251206164120: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.0018790385723114014, 'time_algorithm_update': 0.0073774118423461914, 'vae_loss': 0.012807766039390116, 'time_step': 0.00947690987586975, 'td_error': 0.4493121140379247, 'value_scale': 0.04560046216156037, 'discounted_advantage': 0.00024755154149940786, 'initial_state': 0.06852968037128448, 'diff_eval': 435.1479587822947} step=119000
2025-12-06 17:08.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.49it/s, vae_loss=0.0125]


2025-12-06 17:08.55 [info     ] PLAS_20251206164120: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.0019568586349487304, 'time_algorithm_update': 0.0074456634521484375, 'vae_loss': 0.012542493358254433, 'time_step': 0.009648592233657837, 'td_error': 0.4491960948178432, 'value_scale': 0.045469318384621586, 'discounted_advantage': 0.0006801334230808025, 'initial_state': 0.0686505138874054, 'diff_eval': 494.8835736711812} step=120000
2025-12-06 17:08.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.24it/s, vae_loss=0.0127]


2025-12-06 17:09.09 [info     ] PLAS_20251206164120: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.002009260416030884, 'time_algorithm_update': 0.0076192872524261475, 'vae_loss': 0.012709789122920484, 'time_step': 0.009878052711486816, 'td_error': 0.449061723382722, 'value_scale': 0.04527421212073631, 'discounted_advantage': 0.001722645267902275, 'initial_state': 0.06812423467636108, 'diff_eval': 487.2120183808606} step=121000
2025-12-06 17:09.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.09it/s, vae_loss=0.0124]


2025-12-06 17:09.23 [info     ] PLAS_20251206164120: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.001996714353561401, 'time_algorithm_update': 0.007579070091247558, 'vae_loss': 0.012444615934509785, 'time_step': 0.009800867319107056, 'td_error': 0.4492634527170086, 'value_scale': 0.045465316687853446, 'discounted_advantage': 0.000940646143839819, 'initial_state': 0.06813198328018188, 'diff_eval': 447.89519801102904} step=122000
2025-12-06 17:09.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:10<00:00, 94.03it/s, vae_loss=0.0123]


2025-12-06 17:09.37 [info     ] PLAS_20251206164120: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.0019786181449890135, 'time_algorithm_update': 0.008234261751174927, 'vae_loss': 0.012339938391931355, 'time_step': 0.01044656777381897, 'td_error': 0.44912947825168686, 'value_scale': 0.04540327631681845, 'discounted_advantage': 0.0009720635977216012, 'initial_state': 0.06892281770706177, 'diff_eval': 525.8428691022915} step=123000
2025-12-06 17:09.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.24it/s, vae_loss=0.0122]


2025-12-06 17:09.52 [info     ] PLAS_20251206164120: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.0018989586830139161, 'time_algorithm_update': 0.007556392669677735, 'vae_loss': 0.01224109976645559, 'time_step': 0.009692933320999145, 'td_error': 0.44929278203251943, 'value_scale': 0.04554917804283236, 'discounted_advantage': 0.0004753430026334659, 'initial_state': 0.06876763701438904, 'diff_eval': 458.07217905818857} step=124000
2025-12-06 17:09.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.95it/s, vae_loss=0.0121]


2025-12-06 17:10.05 [info     ] PLAS_20251206164120: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.0018870699405670165, 'time_algorithm_update': 0.007396457195281983, 'vae_loss': 0.012124090262688696, 'time_step': 0.009521929502487182, 'td_error': 0.44935059919341247, 'value_scale': 0.045651096018844685, 'discounted_advantage': 0.00022092667267593604, 'initial_state': 0.06921448558568954, 'diff_eval': 468.8435759803953} step=125000
2025-12-06 17:10.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.71it/s, vae_loss=0.0125]


2025-12-06 17:10.19 [info     ] PLAS_20251206164120: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.0018995964527130126, 'time_algorithm_update': 0.007409884691238404, 'vae_loss': 0.012476119942963123, 'time_step': 0.00955333113670349, 'td_error': 0.4491769701869454, 'value_scale': 0.04544147123463221, 'discounted_advantage': 0.0017147826221671296, 'initial_state': 0.06840013712644577, 'diff_eval': 470.97182319351293} step=126000
2025-12-06 17:10.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.88it/s, vae_loss=0.0123]


2025-12-06 17:10.32 [info     ] PLAS_20251206164120: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.0019126458168029785, 'time_algorithm_update': 0.00749685788154602, 'vae_loss': 0.012346975887194276, 'time_step': 0.009625927448272705, 'td_error': 0.4493681602153375, 'value_scale': 0.04564410666228652, 'discounted_advantage': -1.5727671926973993e-06, 'initial_state': 0.06910119205713272, 'diff_eval': 443.6894024568522} step=127000
2025-12-06 17:10.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.20it/s, vae_loss=0.0122]


2025-12-06 17:10.46 [info     ] PLAS_20251206164120: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.0018810274600982665, 'time_algorithm_update': 0.007291540622711181, 'vae_loss': 0.012195453597698361, 'time_step': 0.009413151264190674, 'td_error': 0.44914396639337545, 'value_scale': 0.045389226357525174, 'discounted_advantage': 0.0015546999963923754, 'initial_state': 0.06887161731719971, 'diff_eval': 473.93374230334484} step=128000
2025-12-06 17:10.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.76it/s, vae_loss=0.0121]


2025-12-06 17:11.00 [info     ] PLAS_20251206164120: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.0018541035652160644, 'time_algorithm_update': 0.007361652135848999, 'vae_loss': 0.012077684923540801, 'time_step': 0.009449561357498169, 'td_error': 0.44922083305458665, 'value_scale': 0.04548567112641407, 'discounted_advantage': 0.0005289239348647681, 'initial_state': 0.06853251159191132, 'diff_eval': 430.15904983938253} step=129000
2025-12-06 17:11.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.75it/s, vae_loss=0.0122]


2025-12-06 17:11.14 [info     ] PLAS_20251206164120: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.001962460994720459, 'time_algorithm_update': 0.007747718572616577, 'vae_loss': 0.012169087308924645, 'time_step': 0.009938344478607178, 'td_error': 0.4491241898008007, 'value_scale': 0.04542681960933916, 'discounted_advantage': 0.0017401545808912939, 'initial_state': 0.0688030868768692, 'diff_eval': 441.72646781777166} step=130000
2025-12-06 17:11.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.38it/s, vae_loss=0.0119]


2025-12-06 17:11.27 [info     ] PLAS_20251206164120: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.0019035284519195556, 'time_algorithm_update': 0.007338282585144043, 'vae_loss': 0.011943528181873261, 'time_step': 0.009479321479797364, 'td_error': 0.449328045711167, 'value_scale': 0.04553319720242232, 'discounted_advantage': 9.438252991499129e-05, 'initial_state': 0.06814274191856384, 'diff_eval': 437.16945201228765} step=131000
2025-12-06 17:11.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.51it/s, vae_loss=0.0118]


2025-12-06 17:11.41 [info     ] PLAS_20251206164120: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.0018799092769622803, 'time_algorithm_update': 0.007456621408462524, 'vae_loss': 0.011779850346501917, 'time_step': 0.009570814371109009, 'td_error': 0.44903442738466365, 'value_scale': 0.045181512046911364, 'discounted_advantage': 0.0019025613238661879, 'initial_state': 0.06776893883943558, 'diff_eval': 540.6336094951769} step=132000
2025-12-06 17:11.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:10<00:00, 96.68it/s, vae_loss=0.0118]


2025-12-06 17:11.56 [info     ] PLAS_20251206164120: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.0018645329475402831, 'time_algorithm_update': 0.008080142736434936, 'vae_loss': 0.01176376489503309, 'time_step': 0.01016774582862854, 'td_error': 0.4490427083501302, 'value_scale': 0.04530385839663077, 'discounted_advantage': 0.0013064196892429022, 'initial_state': 0.06839045137166977, 'diff_eval': 513.8615799543063} step=133000
2025-12-06 17:11.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.70it/s, vae_loss=0.0118]


2025-12-06 17:12.10 [info     ] PLAS_20251206164120: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.0019305548667907714, 'time_algorithm_update': 0.007402020454406738, 'vae_loss': 0.011840893054381013, 'time_step': 0.009558348655700684, 'td_error': 0.44886774220139813, 'value_scale': 0.0450880689369257, 'discounted_advantage': 0.0021570747580346703, 'initial_state': 0.06783431023359299, 'diff_eval': 558.286355691786} step=134000
2025-12-06 17:12.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.27it/s, vae_loss=0.0118]


2025-12-06 17:12.24 [info     ] PLAS_20251206164120: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.001998603105545044, 'time_algorithm_update': 0.007749245166778564, 'vae_loss': 0.011789984555914998, 'time_step': 0.009973732471466065, 'td_error': 0.4493123107422844, 'value_scale': 0.045582942243459365, 'discounted_advantage': 1.3351856422762892e-05, 'initial_state': 0.06876735389232635, 'diff_eval': 416.3538040777658} step=135000
2025-12-06 17:12.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.92it/s, vae_loss=0.0116]


2025-12-06 17:12.37 [info     ] PLAS_20251206164120: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.0019452641010284425, 'time_algorithm_update': 0.0074555518627166745, 'vae_loss': 0.011549822232220322, 'time_step': 0.009635332584381104, 'td_error': 0.44901747571675554, 'value_scale': 0.0452442043816104, 'discounted_advantage': 0.0014199390901743357, 'initial_state': 0.06818358600139618, 'diff_eval': 557.5754168700727} step=136000
2025-12-06 17:12.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.83it/s, vae_loss=0.0115]


2025-12-06 17:12.51 [info     ] PLAS_20251206164120: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.001960517883300781, 'time_algorithm_update': 0.007421160459518432, 'vae_loss': 0.011478403553599493, 'time_step': 0.009633052825927734, 'td_error': 0.4491096219151591, 'value_scale': 0.04537317176542283, 'discounted_advantage': 0.0011793259419274143, 'initial_state': 0.0686345100402832, 'diff_eval': 476.798393467286} step=137000
2025-12-06 17:12.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:10<00:00, 96.21it/s, vae_loss=0.0116]


2025-12-06 17:13.05 [info     ] PLAS_20251206164120: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.0020862481594085694, 'time_algorithm_update': 0.007866763114929199, 'vae_loss': 0.011542492953129114, 'time_step': 0.01019176435470581, 'td_error': 0.4493027469143569, 'value_scale': 0.04566421416843402, 'discounted_advantage': 0.0003566457293195275, 'initial_state': 0.06911793351173401, 'diff_eval': 412.7293790735759} step=138000
2025-12-06 17:13.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.22it/s, vae_loss=0.0114]


2025-12-06 17:13.19 [info     ] PLAS_20251206164120: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.0019331693649291993, 'time_algorithm_update': 0.0074400570392608644, 'vae_loss': 0.011361521682236343, 'time_step': 0.009596503973007202, 'td_error': 0.44908420575783625, 'value_scale': 0.045276229304151434, 'discounted_advantage': 0.0012149455937376888, 'initial_state': 0.06791894137859344, 'diff_eval': 496.2609828409574} step=139000
2025-12-06 17:13.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.97it/s, vae_loss=0.0112]


2025-12-06 17:13.33 [info     ] PLAS_20251206164120: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.001996927738189697, 'time_algorithm_update': 0.007684115648269653, 'vae_loss': 0.011240053268149496, 'time_step': 0.009929340362548828, 'td_error': 0.4490186461732256, 'value_scale': 0.04522370811464376, 'discounted_advantage': 0.0018404965403048686, 'initial_state': 0.06773681938648224, 'diff_eval': 464.8915968032934} step=140000
2025-12-06 17:13.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.93it/s, vae_loss=0.0113]


2025-12-06 17:13.47 [info     ] PLAS_20251206164120: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.001960307836532593, 'time_algorithm_update': 0.007602565050125122, 'vae_loss': 0.011357163953594863, 'time_step': 0.009803601503372193, 'td_error': 0.44933518255129307, 'value_scale': 0.04563336303006399, 'discounted_advantage': 0.0005303237217892704, 'initial_state': 0.06903357058763504, 'diff_eval': 429.91379864141607} step=141000
2025-12-06 17:13.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.08it/s, vae_loss=0.0114]


2025-12-06 17:14.00 [info     ] PLAS_20251206164120: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.0018365478515625, 'time_algorithm_update': 0.007366806983947754, 'vae_loss': 0.011441026243846863, 'time_step': 0.009424736738204957, 'td_error': 0.44912770106072286, 'value_scale': 0.04548389180770264, 'discounted_advantage': 0.001046465157096102, 'initial_state': 0.0691874623298645, 'diff_eval': 407.8344662802757} step=142000
2025-12-06 17:14.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.86it/s, vae_loss=0.0113]


2025-12-06 17:14.14 [info     ] PLAS_20251206164120: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.0018870284557342529, 'time_algorithm_update': 0.007423081636428833, 'vae_loss': 0.011261160666123033, 'time_step': 0.009546351671218872, 'td_error': 0.44942836866915015, 'value_scale': 0.04571326452753158, 'discounted_advantage': -0.0001045938873081918, 'initial_state': 0.06917139887809753, 'diff_eval': 387.92999897609116} step=143000
2025-12-06 17:14.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.04it/s, vae_loss=0.0112]


2025-12-06 17:14.28 [info     ] PLAS_20251206164120: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.0019257814884185791, 'time_algorithm_update': 0.007461562156677246, 'vae_loss': 0.011234829593915491, 'time_step': 0.009619704484939574, 'td_error': 0.4491708567381701, 'value_scale': 0.045481424160030016, 'discounted_advantage': 0.0010017237576243256, 'initial_state': 0.0688113123178482, 'diff_eval': 412.32035149817347} step=144000
2025-12-06 17:14.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.56it/s, vae_loss=0.0113]


2025-12-06 17:14.41 [info     ] PLAS_20251206164120: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.0018888475894927978, 'time_algorithm_update': 0.007535621166229248, 'vae_loss': 0.011253195641562342, 'time_step': 0.009658117294311524, 'td_error': 0.44944045587821185, 'value_scale': 0.04575600638971582, 'discounted_advantage': -0.0002193960981783622, 'initial_state': 0.06903277337551117, 'diff_eval': 399.40847244556824} step=145000
2025-12-06 17:14.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.78it/s, vae_loss=0.0111]


2025-12-06 17:14.56 [info     ] PLAS_20251206164120: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.002052023649215698, 'time_algorithm_update': 0.007756236314773559, 'vae_loss': 0.011076343534514308, 'time_step': 0.010037901878356934, 'td_error': 0.44919156575478997, 'value_scale': 0.04548568400255368, 'discounted_advantage': 0.0014795553948630624, 'initial_state': 0.0689513310790062, 'diff_eval': 399.32156736547967} step=146000
2025-12-06 17:14.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.80it/s, vae_loss=0.011] 


2025-12-06 17:15.10 [info     ] PLAS_20251206164120: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.0020059142112731935, 'time_algorithm_update': 0.007647762060165405, 'vae_loss': 0.011063648879993707, 'time_step': 0.00991205358505249, 'td_error': 0.449381286387264, 'value_scale': 0.045749785474986336, 'discounted_advantage': 0.0002343274693122628, 'initial_state': 0.06971099972724915, 'diff_eval': 406.22462576423754} step=147000
2025-12-06 17:15.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.67it/s, vae_loss=0.0109]


2025-12-06 17:15.23 [info     ] PLAS_20251206164120: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.0018844854831695556, 'time_algorithm_update': 0.007460983514785767, 'vae_loss': 0.010941573018208146, 'time_step': 0.009564654111862183, 'td_error': 0.44914293969427693, 'value_scale': 0.04541919563997659, 'discounted_advantage': 0.001364653959644637, 'initial_state': 0.06879652291536331, 'diff_eval': 409.8247593701504} step=148000
2025-12-06 17:15.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.68it/s, vae_loss=0.011]


2025-12-06 17:15.37 [info     ] PLAS_20251206164120: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.001925973415374756, 'time_algorithm_update': 0.007490358114242554, 'vae_loss': 0.010997502425219864, 'time_step': 0.00964816665649414, 'td_error': 0.4490361390362801, 'value_scale': 0.04528784570662806, 'discounted_advantage': 0.0017830260547353509, 'initial_state': 0.06840485334396362, 'diff_eval': 444.49554412756976} step=149000
2025-12-06 17:15.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.06it/s, vae_loss=0.0107]


2025-12-06 17:15.51 [info     ] PLAS_20251206164120: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.0019449143409729004, 'time_algorithm_update': 0.007526784420013428, 'vae_loss': 0.010696335760876537, 'time_step': 0.00970601749420166, 'td_error': 0.44926391783377906, 'value_scale': 0.045538644301287415, 'discounted_advantage': 0.0008014450052407351, 'initial_state': 0.06877947598695755, 'diff_eval': 379.9343103687875} step=150000
2025-12-06 17:15.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.70it/s, vae_loss=0.0106]


2025-12-06 17:16.04 [info     ] PLAS_20251206164120: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.001909616231918335, 'time_algorithm_update': 0.007425381183624268, 'vae_loss': 0.010590468710754067, 'time_step': 0.009559515953063966, 'td_error': 0.44908755691891183, 'value_scale': 0.045282798442577404, 'discounted_advantage': 0.001431192409242288, 'initial_state': 0.06802316755056381, 'diff_eval': 411.0188250209065} step=151000
2025-12-06 17:16.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:10<00:00, 96.98it/s, vae_loss=0.0106]


2025-12-06 17:16.19 [info     ] PLAS_20251206164120: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.0020727858543395997, 'time_algorithm_update': 0.007797878742218017, 'vae_loss': 0.010551480221096426, 'time_step': 0.010109020948410034, 'td_error': 0.44933128610707085, 'value_scale': 0.04559581208433909, 'discounted_advantage': -9.561754930294737e-05, 'initial_state': 0.06851379573345184, 'diff_eval': 362.884515419776} step=152000
2025-12-06 17:16.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.56it/s, vae_loss=0.0107] 


2025-12-06 17:16.32 [info     ] PLAS_20251206164120: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.001966837406158447, 'time_algorithm_update': 0.007612884283065796, 'vae_loss': 0.010654953962191939, 'time_step': 0.009831460237503052, 'td_error': 0.44923233405664836, 'value_scale': 0.04552778623036397, 'discounted_advantage': 0.0012527980638428909, 'initial_state': 0.06919635087251663, 'diff_eval': 372.54104994833614} step=153000
2025-12-06 17:16.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:10<00:00, 95.79it/s, vae_loss=0.0107]


2025-12-06 17:16.47 [info     ] PLAS_20251206164120: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.0020062196254730224, 'time_algorithm_update': 0.008000462532043458, 'vae_loss': 0.010709972248412669, 'time_step': 0.010245794296264649, 'td_error': 0.44903091674447837, 'value_scale': 0.04522118691123916, 'discounted_advantage': 0.001597074012227136, 'initial_state': 0.06811285018920898, 'diff_eval': 479.7496167534167} step=154000
2025-12-06 17:16.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:10<00:00, 96.92it/s, vae_loss=0.0106]


2025-12-06 17:17.01 [info     ] PLAS_20251206164120: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.0020619733333587646, 'time_algorithm_update': 0.007780475378036499, 'vae_loss': 0.010562647955492138, 'time_step': 0.010110360622406006, 'td_error': 0.4491783380705931, 'value_scale': 0.045471619248658744, 'discounted_advantage': 0.00070227546157362, 'initial_state': 0.06865324079990387, 'diff_eval': 359.1562248459289} step=155000
2025-12-06 17:17.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.79it/s, vae_loss=0.0107]


2025-12-06 17:17.15 [info     ] PLAS_20251206164120: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.001908996820449829, 'time_algorithm_update': 0.007606320858001709, 'vae_loss': 0.01069339504558593, 'time_step': 0.009738131046295166, 'td_error': 0.4493958605347755, 'value_scale': 0.045712188834344775, 'discounted_advantage': 0.000115887802433201, 'initial_state': 0.06917665153741837, 'diff_eval': 380.03618367418943} step=156000
2025-12-06 17:17.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.11it/s, vae_loss=0.0105]


2025-12-06 17:17.28 [info     ] PLAS_20251206164120: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.0018660829067230224, 'time_algorithm_update': 0.007341377496719361, 'vae_loss': 0.010479799373541028, 'time_step': 0.0094327073097229, 'td_error': 0.44916319925829334, 'value_scale': 0.04542333778299935, 'discounted_advantage': 0.0005163475721526115, 'initial_state': 0.06808155030012131, 'diff_eval': 408.2460293894895} step=157000
2025-12-06 17:17.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.79it/s, vae_loss=0.0104]


2025-12-06 17:17.42 [info     ] PLAS_20251206164120: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.0018807940483093262, 'time_algorithm_update': 0.007427255392074585, 'vae_loss': 0.01036588175734505, 'time_step': 0.009546197652816772, 'td_error': 0.44929935255831244, 'value_scale': 0.04562767199962732, 'discounted_advantage': 0.000493327943963862, 'initial_state': 0.06894664466381073, 'diff_eval': 352.0711296314405} step=158000
2025-12-06 17:17.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.44it/s, vae_loss=0.0103]


2025-12-06 17:17.55 [info     ] PLAS_20251206164120: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.001954073429107666, 'time_algorithm_update': 0.007498746633529663, 'vae_loss': 0.010286062257830053, 'time_step': 0.009680545568466186, 'td_error': 0.44932480286369775, 'value_scale': 0.045639233347902826, 'discounted_advantage': 0.00030812199362885835, 'initial_state': 0.0689038336277008, 'diff_eval': 373.72142383211764} step=159000
2025-12-06 17:17.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.77it/s, vae_loss=0.0104]


2025-12-06 17:18.09 [info     ] PLAS_20251206164120: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.0018892042636871338, 'time_algorithm_update': 0.007368973731994629, 'vae_loss': 0.010394301280844957, 'time_step': 0.009475042819976806, 'td_error': 0.4491472081980065, 'value_scale': 0.0453673096249553, 'discounted_advantage': 0.0012168510301569845, 'initial_state': 0.06825008243322372, 'diff_eval': 376.6133415516706} step=160000
2025-12-06 17:18.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.66it/s, vae_loss=0.0103]


2025-12-06 17:18.23 [info     ] PLAS_20251206164120: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.0019138545989990233, 'time_algorithm_update': 0.007595211982727051, 'vae_loss': 0.010275779333431274, 'time_step': 0.009747709512710572, 'td_error': 0.4492374819434841, 'value_scale': 0.04552624494927152, 'discounted_advantage': 0.0001046693008309883, 'initial_state': 0.06867890805006027, 'diff_eval': 384.1624777667604} step=161000
2025-12-06 17:18.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.32it/s, vae_loss=0.0104] 


2025-12-06 17:18.36 [info     ] PLAS_20251206164120: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.001968552350997925, 'time_algorithm_update': 0.007684462785720826, 'vae_loss': 0.010351518128300086, 'time_step': 0.009878814220428467, 'td_error': 0.4491943026423984, 'value_scale': 0.04542153154844098, 'discounted_advantage': 0.0010031115495329924, 'initial_state': 0.0681929811835289, 'diff_eval': 364.2843847792315} step=162000
2025-12-06 17:18.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.23it/s, vae_loss=0.01]  


2025-12-06 17:18.50 [info     ] PLAS_20251206164120: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.0018617265224456787, 'time_algorithm_update': 0.007585462093353272, 'vae_loss': 0.010045914356596767, 'time_step': 0.009686409235000611, 'td_error': 0.4492506197909765, 'value_scale': 0.04556666588065685, 'discounted_advantage': 0.0006327238572888929, 'initial_state': 0.06877823173999786, 'diff_eval': 342.1143764203961} step=163000
2025-12-06 17:18.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.46it/s, vae_loss=0.0101]


2025-12-06 17:19.04 [info     ] PLAS_20251206164120: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.0019462320804595947, 'time_algorithm_update': 0.007492512702941895, 'vae_loss': 0.010035240132594481, 'time_step': 0.009669702053070068, 'td_error': 0.449116859635839, 'value_scale': 0.04531542139599367, 'discounted_advantage': 0.0012424814591576834, 'initial_state': 0.06819398701190948, 'diff_eval': 389.7361718161674} step=164000
2025-12-06 17:19.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:10<00:00, 95.76it/s, vae_loss=0.01]   


2025-12-06 17:19.18 [info     ] PLAS_20251206164120: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.0024754416942596434, 'time_algorithm_update': 0.007554913997650146, 'vae_loss': 0.010034723872784526, 'time_step': 0.010261713027954101, 'td_error': 0.44914655312625745, 'value_scale': 0.04541844871068453, 'discounted_advantage': 0.001163624847225955, 'initial_state': 0.06844480335712433, 'diff_eval': 360.4868211332778} step=165000
2025-12-06 17:19.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.08it/s, vae_loss=0.01]   


2025-12-06 17:19.32 [info     ] PLAS_20251206164120: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.002021306276321411, 'time_algorithm_update': 0.007718556165695191, 'vae_loss': 0.010005733322119341, 'time_step': 0.009990724086761474, 'td_error': 0.4491887789388977, 'value_scale': 0.04547761456513182, 'discounted_advantage': 0.0008810379324452571, 'initial_state': 0.06859201192855835, 'diff_eval': 352.0868500043846} step=166000
2025-12-06 17:19.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.29it/s, vae_loss=0.0101]


2025-12-06 17:19.46 [info     ] PLAS_20251206164120: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.001873471975326538, 'time_algorithm_update': 0.007422719478607178, 'vae_loss': 0.01011931813787669, 'time_step': 0.009562266349792481, 'td_error': 0.4491345455815094, 'value_scale': 0.04538827070580719, 'discounted_advantage': 0.001448624192691553, 'initial_state': 0.06820730119943619, 'diff_eval': 341.7886778213009} step=167000
2025-12-06 17:19.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.60it/s, vae_loss=0.00999]


2025-12-06 17:20.00 [info     ] PLAS_20251206164120: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.001916687250137329, 'time_algorithm_update': 0.007477533102035522, 'vae_loss': 0.009975204541813582, 'time_step': 0.00964760947227478, 'td_error': 0.4493076860297949, 'value_scale': 0.045588149599752825, 'discounted_advantage': 0.0006791046045254572, 'initial_state': 0.06855901330709457, 'diff_eval': 359.232516906175} step=168000
2025-12-06 17:20.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.19it/s, vae_loss=0.00988]


2025-12-06 17:20.13 [info     ] PLAS_20251206164120: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.0019427213668823241, 'time_algorithm_update': 0.0075043303966522215, 'vae_loss': 0.00988252020557411, 'time_step': 0.009684922456741333, 'td_error': 0.44925305127757026, 'value_scale': 0.04551058535542954, 'discounted_advantage': 0.0006038969035260462, 'initial_state': 0.06874766945838928, 'diff_eval': 334.48912049973563} step=169000
2025-12-06 17:20.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.47it/s, vae_loss=0.01] 


2025-12-06 17:20.27 [info     ] PLAS_20251206164120: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.0018938496112823487, 'time_algorithm_update': 0.007449793815612793, 'vae_loss': 0.010042173106689006, 'time_step': 0.009571534156799316, 'td_error': 0.44931064380201535, 'value_scale': 0.04557201282136074, 'discounted_advantage': 0.0006527910810497249, 'initial_state': 0.06908882409334183, 'diff_eval': 369.48557600986743} step=170000
2025-12-06 17:20.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.42it/s, vae_loss=0.00983]


2025-12-06 17:20.41 [info     ] PLAS_20251206164120: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.0018806343078613282, 'time_algorithm_update': 0.007580472946166992, 'vae_loss': 0.009825948311947287, 'time_step': 0.009683223247528076, 'td_error': 0.4491174130964716, 'value_scale': 0.04529255779291451, 'discounted_advantage': 0.0016343889183713862, 'initial_state': 0.0679529681801796, 'diff_eval': 360.19808159400577} step=171000
2025-12-06 17:20.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.70it/s, vae_loss=0.00983]


2025-12-06 17:20.55 [info     ] PLAS_20251206164120: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.0020137836933135987, 'time_algorithm_update': 0.0076492996215820315, 'vae_loss': 0.009820233874954284, 'time_step': 0.009919815540313721, 'td_error': 0.4490764096978734, 'value_scale': 0.04527826680335757, 'discounted_advantage': 0.0008084481246718064, 'initial_state': 0.06786186248064041, 'diff_eval': 490.7761103966825} step=172000
2025-12-06 17:20.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.73it/s, vae_loss=0.00981]


2025-12-06 17:21.08 [info     ] PLAS_20251206164120: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.0019500765800476074, 'time_algorithm_update': 0.007500980138778686, 'vae_loss': 0.009813051076605916, 'time_step': 0.00972122573852539, 'td_error': 0.44898858960496596, 'value_scale': 0.0452247157806885, 'discounted_advantage': 0.001400363414379072, 'initial_state': 0.06802395731210709, 'diff_eval': 421.5186370847183} step=173000
2025-12-06 17:21.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.70it/s, vae_loss=0.00974]


2025-12-06 17:21.22 [info     ] PLAS_20251206164120: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.0019201531410217285, 'time_algorithm_update': 0.007585912466049194, 'vae_loss': 0.009724190714769066, 'time_step': 0.009746448278427125, 'td_error': 0.44923614887705254, 'value_scale': 0.04563315840920454, 'discounted_advantage': 0.00045634982184948825, 'initial_state': 0.06920931488275528, 'diff_eval': 316.0084531221253} step=174000
2025-12-06 17:21.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.56it/s, vae_loss=0.00952]


2025-12-06 17:21.36 [info     ] PLAS_20251206164120: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.00206063175201416, 'time_algorithm_update': 0.007484504699707031, 'vae_loss': 0.009515075593953952, 'time_step': 0.009756782293319702, 'td_error': 0.4491099269111318, 'value_scale': 0.04526065639942285, 'discounted_advantage': 0.0010772316819860287, 'initial_state': 0.06754276156425476, 'diff_eval': 450.08601118684106} step=175000
2025-12-06 17:21.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.42it/s, vae_loss=0.00968]


2025-12-06 17:21.50 [info     ] PLAS_20251206164120: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.0019260485172271728, 'time_algorithm_update': 0.007379362583160401, 'vae_loss': 0.009666068002115936, 'time_step': 0.009559874057769775, 'td_error': 0.4491305069036737, 'value_scale': 0.04537154668676671, 'discounted_advantage': 0.0013235316476387035, 'initial_state': 0.06822232902050018, 'diff_eval': 335.6344449456584} step=176000
2025-12-06 17:21.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.38it/s, vae_loss=0.00987]


2025-12-06 17:22.04 [info     ] PLAS_20251206164120: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.0019163715839385987, 'time_algorithm_update': 0.007420681715011597, 'vae_loss': 0.009864907763898373, 'time_step': 0.00958272409439087, 'td_error': 0.449374977295742, 'value_scale': 0.04570267749372604, 'discounted_advantage': 0.0005100418437100904, 'initial_state': 0.06953562051057816, 'diff_eval': 342.93213363330705} step=177000
2025-12-06 17:22.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.29it/s, vae_loss=0.00954]


2025-12-06 17:22.18 [info     ] PLAS_20251206164120: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.0019092819690704347, 'time_algorithm_update': 0.007427801370620727, 'vae_loss': 0.009532951569883152, 'time_step': 0.00957858920097351, 'td_error': 0.44914401057830544, 'value_scale': 0.04541404998009558, 'discounted_advantage': 0.0011689617954002015, 'initial_state': 0.06874984502792358, 'diff_eval': 347.0638748682918} step=178000
2025-12-06 17:22.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.64it/s, vae_loss=0.0093]


2025-12-06 17:22.32 [info     ] PLAS_20251206164120: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.001915600299835205, 'time_algorithm_update': 0.007494752883911133, 'vae_loss': 0.009294249762780964, 'time_step': 0.009652825593948365, 'td_error': 0.4490939977961881, 'value_scale': 0.045312808763722935, 'discounted_advantage': 0.0014290881241652152, 'initial_state': 0.0680912509560585, 'diff_eval': 356.15374320119014} step=179000
2025-12-06 17:22.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:10<00:00, 96.86it/s, vae_loss=0.0095] 


2025-12-06 17:22.46 [info     ] PLAS_20251206164120: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.002224201679229736, 'time_algorithm_update': 0.007670339822769165, 'vae_loss': 0.009501508070388808, 'time_step': 0.010126356840133667, 'td_error': 0.44913415316197675, 'value_scale': 0.04540088428894787, 'discounted_advantage': 0.001329846278698456, 'initial_state': 0.06852192431688309, 'diff_eval': 329.0015017363574} step=180000
2025-12-06 17:22.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.50it/s, vae_loss=0.00938]


2025-12-06 17:22.59 [info     ] PLAS_20251206164120: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.0018850202560424805, 'time_algorithm_update': 0.00746162748336792, 'vae_loss': 0.009384010198991745, 'time_step': 0.009579208135604858, 'td_error': 0.4491474717874275, 'value_scale': 0.04541297065568752, 'discounted_advantage': 0.000695005418609304, 'initial_state': 0.0684296041727066, 'diff_eval': 346.397877098519} step=181000
2025-12-06 17:22.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.69it/s, vae_loss=0.00939]


2025-12-06 17:23.13 [info     ] PLAS_20251206164120: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.0019486370086669923, 'time_algorithm_update': 0.007503875255584717, 'vae_loss': 0.00936730819242075, 'time_step': 0.00972171688079834, 'td_error': 0.4491497822709629, 'value_scale': 0.04540166913587147, 'discounted_advantage': 0.0015339041070307248, 'initial_state': 0.068874791264534, 'diff_eval': 341.09047229832845} step=182000
2025-12-06 17:23.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.54it/s, vae_loss=0.00922]


2025-12-06 17:23.27 [info     ] PLAS_20251206164120: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.002018017292022705, 'time_algorithm_update': 0.007606367349624634, 'vae_loss': 0.00921481443499215, 'time_step': 0.009864171028137208, 'td_error': 0.4491851872805153, 'value_scale': 0.04543458394493911, 'discounted_advantage': 0.0011353266769522574, 'initial_state': 0.0686631128191948, 'diff_eval': 320.23243204716226} step=183000
2025-12-06 17:23.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.95it/s, vae_loss=0.0092]


2025-12-06 17:23.40 [info     ] PLAS_20251206164120: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.001841465711593628, 'time_algorithm_update': 0.007274493932723999, 'vae_loss': 0.009200441237073392, 'time_step': 0.009348851442337037, 'td_error': 0.449406759589741, 'value_scale': 0.04575004579135578, 'discounted_advantage': -0.00010328322063460918, 'initial_state': 0.06951624900102615, 'diff_eval': 323.00112894546606} step=184000
2025-12-06 17:23.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.30it/s, vae_loss=0.00917]


2025-12-06 17:23.54 [info     ] PLAS_20251206164120: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.0019097321033477784, 'time_algorithm_update': 0.007453271150588989, 'vae_loss': 0.00917692375741899, 'time_step': 0.00958310627937317, 'td_error': 0.4490276992658592, 'value_scale': 0.04532914085794562, 'discounted_advantage': 0.001332185835731863, 'initial_state': 0.06843580305576324, 'diff_eval': 366.0119719058009} step=185000
2025-12-06 17:23.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.12it/s, vae_loss=0.00915]


2025-12-06 17:24.07 [info     ] PLAS_20251206164120: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.0018592939376831056, 'time_algorithm_update': 0.007351015567779541, 'vae_loss': 0.009161669285735115, 'time_step': 0.009425031661987304, 'td_error': 0.44929037871161054, 'value_scale': 0.04557672777226427, 'discounted_advantage': 0.0005852522749757968, 'initial_state': 0.06858938932418823, 'diff_eval': 289.8436437336988} step=186000
2025-12-06 17:24.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.18it/s, vae_loss=0.00903]


2025-12-06 17:24.20 [info     ] PLAS_20251206164120: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.0018091638088226318, 'time_algorithm_update': 0.007202574729919434, 'vae_loss': 0.009047178022563458, 'time_step': 0.009235092878341674, 'td_error': 0.4490542018426465, 'value_scale': 0.04532650502848268, 'discounted_advantage': 0.0009242681451225074, 'initial_state': 0.06817052513360977, 'diff_eval': 380.81585294071164} step=187000
2025-12-06 17:24.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.32it/s, vae_loss=0.00908]


2025-12-06 17:24.34 [info     ] PLAS_20251206164120: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.0018799641132354735, 'time_algorithm_update': 0.007313764333724976, 'vae_loss': 0.009098295867443085, 'time_step': 0.009418722629547118, 'td_error': 0.4491592619388948, 'value_scale': 0.045422821236213815, 'discounted_advantage': 0.0010595347943994145, 'initial_state': 0.06827054917812347, 'diff_eval': 292.01394168594703} step=188000
2025-12-06 17:24.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.66it/s, vae_loss=0.00897]


2025-12-06 17:24.48 [info     ] PLAS_20251206164120: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.0018677947521209717, 'time_algorithm_update': 0.007398518562316894, 'vae_loss': 0.00897956759063527, 'time_step': 0.009537390947341918, 'td_error': 0.449408213791779, 'value_scale': 0.04573976954254763, 'discounted_advantage': -1.965315988699973e-05, 'initial_state': 0.0694265365600586, 'diff_eval': 304.40556606160806} step=189000
2025-12-06 17:24.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.41it/s, vae_loss=0.00908]


2025-12-06 17:25.01 [info     ] PLAS_20251206164120: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.0019438774585723876, 'time_algorithm_update': 0.007398750305175781, 'vae_loss': 0.009080953991506248, 'time_step': 0.009573727369308472, 'td_error': 0.44905920269133776, 'value_scale': 0.04525236572800181, 'discounted_advantage': 0.001419971280559952, 'initial_state': 0.06781274080276489, 'diff_eval': 366.717904430152} step=190000
2025-12-06 17:25.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.47it/s, vae_loss=0.00902]


2025-12-06 17:25.15 [info     ] PLAS_20251206164120: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.0019313318729400634, 'time_algorithm_update': 0.007419950485229492, 'vae_loss': 0.009031470733927563, 'time_step': 0.009585609436035156, 'td_error': 0.4490602862093325, 'value_scale': 0.045337055154825494, 'discounted_advantage': 0.0016484818333241977, 'initial_state': 0.06843046844005585, 'diff_eval': 332.53272135653543} step=191000
2025-12-06 17:25.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.12it/s, vae_loss=0.00894]


2025-12-06 17:25.29 [info     ] PLAS_20251206164120: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.001897792339324951, 'time_algorithm_update': 0.007524871826171875, 'vae_loss': 0.008934824474854395, 'time_step': 0.00968933367729187, 'td_error': 0.44937729743817256, 'value_scale': 0.04568808655038301, 'discounted_advantage': 0.0003356157366664227, 'initial_state': 0.06899736821651459, 'diff_eval': 285.2137002068369} step=192000
2025-12-06 17:25.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.07it/s, vae_loss=0.00891]


2025-12-06 17:25.42 [info     ] PLAS_20251206164120: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.0018927416801452638, 'time_algorithm_update': 0.007380743980407715, 'vae_loss': 0.0088922194277402, 'time_step': 0.009518268585205079, 'td_error': 0.44940222477269487, 'value_scale': 0.045757450836289855, 'discounted_advantage': 0.00010315433541154387, 'initial_state': 0.0692586898803711, 'diff_eval': 289.7114310034683} step=193000
2025-12-06 17:25.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.52it/s, vae_loss=0.0089]


2025-12-06 17:25.56 [info     ] PLAS_20251206164120: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.0018870656490325927, 'time_algorithm_update': 0.007424585103988648, 'vae_loss': 0.008883410530164839, 'time_step': 0.009557199954986573, 'td_error': 0.4491580732635118, 'value_scale': 0.045395934033111575, 'discounted_advantage': 0.001171656764941264, 'initial_state': 0.06843607872724533, 'diff_eval': 298.45098425265377} step=194000
2025-12-06 17:25.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.50it/s, vae_loss=0.0087]


2025-12-06 17:26.10 [info     ] PLAS_20251206164120: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.0019181804656982421, 'time_algorithm_update': 0.007532075405120849, 'vae_loss': 0.008709598227404058, 'time_step': 0.009676644086837768, 'td_error': 0.4495666734340399, 'value_scale': 0.04589544083261105, 'discounted_advantage': -0.0007743138884869443, 'initial_state': 0.06941535323858261, 'diff_eval': 299.8772212292687} step=195000
2025-12-06 17:26.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.94it/s, vae_loss=0.00877]


2025-12-06 17:26.23 [info     ] PLAS_20251206164120: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.001848691463470459, 'time_algorithm_update': 0.007281007528305053, 'vae_loss': 0.008757413755171, 'time_step': 0.009349757671356202, 'td_error': 0.44898962849665586, 'value_scale': 0.045237618012936145, 'discounted_advantage': 0.0018445807099626223, 'initial_state': 0.06809142231941223, 'diff_eval': 347.7058919154431} step=196000
2025-12-06 17:26.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:10<00:00, 95.26it/s, vae_loss=0.00878]


2025-12-06 17:26.37 [info     ] PLAS_20251206164120: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.0021387639045715333, 'time_algorithm_update': 0.007862163305282593, 'vae_loss': 0.008783449644222856, 'time_step': 0.010277360677719116, 'td_error': 0.4491124482686025, 'value_scale': 0.04537618877631608, 'discounted_advantage': 0.0012020537694287727, 'initial_state': 0.06810832023620605, 'diff_eval': 300.4356639211356} step=197000
2025-12-06 17:26.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.77it/s, vae_loss=0.00881]


2025-12-06 17:26.51 [info     ] PLAS_20251206164120: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.0019747273921966553, 'time_algorithm_update': 0.00770821738243103, 'vae_loss': 0.008808940114919096, 'time_step': 0.009921703100204468, 'td_error': 0.4491863922328151, 'value_scale': 0.045444552975567015, 'discounted_advantage': 0.0009410357119314452, 'initial_state': 0.06868964433670044, 'diff_eval': 304.9627574736965} step=198000
2025-12-06 17:26.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.15it/s, vae_loss=0.00868]


2025-12-06 17:27.05 [info     ] PLAS_20251206164120: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.0018954482078552247, 'time_algorithm_update': 0.007302285194396973, 'vae_loss': 0.008663990756729618, 'time_step': 0.009420711278915405, 'td_error': 0.4491844689902205, 'value_scale': 0.04550804427652389, 'discounted_advantage': 0.0009565797733502754, 'initial_state': 0.06887482851743698, 'diff_eval': 266.67748919242763} step=199000
2025-12-06 17:27.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.25it/s, vae_loss=0.0087] 


2025-12-06 17:27.19 [info     ] PLAS_20251206164120: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.001929821014404297, 'time_algorithm_update': 0.007700068712234497, 'vae_loss': 0.008687936262693256, 'time_step': 0.009874632358551025, 'td_error': 0.44905643659793026, 'value_scale': 0.04529101729364902, 'discounted_advantage': 0.0016605674226257648, 'initial_state': 0.06825067847967148, 'diff_eval': 316.6627033275151} step=200000
2025-12-06 17:27.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLAS_20251206164120\model_200000.d3
Training model:  PLASWithPerturbation
2025-12-06 17:27.19 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=1)
2025-12-06 17:27.19 [debug    ] Building m

Epoch 1/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.76it/s, vae_loss=0.0658]


2025-12-06 17:27.33 [info     ] PLASWithPerturbation_20251206172719: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.00194205379486084, 'time_algorithm_update': 0.007387218475341797, 'vae_loss': 0.06561813170276583, 'time_step': 0.009552509069442749, 'td_error': 0.45996623747003335, 'value_scale': -0.04854637226019576, 'discounted_advantage': 0.0030510556606891197, 'initial_state': -0.043254103511571884, 'diff_eval': 2725.351809240274} step=1000
2025-12-06 17:27.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.76it/s, vae_loss=0.0413]


2025-12-06 17:27.47 [info     ] PLASWithPerturbation_20251206172719: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.0020196797847747802, 'time_algorithm_update': 0.007576717138290405, 'vae_loss': 0.04133612912148237, 'time_step': 0.009834401369094848, 'td_error': 0.4612374018065897, 'value_scale': -0.04739649553196965, 'discounted_advantage': -0.003464876831126822, 'initial_state': -0.04256260767579079, 'diff_eval': 2573.3770648634422} step=2000
2025-12-06 17:27.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.36it/s, vae_loss=0.0393]


2025-12-06 17:28.01 [info     ] PLASWithPerturbation_20251206172719: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.001865854263305664, 'time_algorithm_update': 0.0072367236614227295, 'vae_loss': 0.03931001922301948, 'time_step': 0.009318382024765015, 'td_error': 0.46074709295513655, 'value_scale': -0.04781989489852982, 'discounted_advantage': -0.0022189657618070613, 'initial_state': -0.04277176037430763, 'diff_eval': 2462.404493241042} step=3000
2025-12-06 17:28.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.45it/s, vae_loss=0.0377]


2025-12-06 17:28.15 [info     ] PLASWithPerturbation_20251206172719: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.001932891845703125, 'time_algorithm_update': 0.007418737649917602, 'vae_loss': 0.03773125542327762, 'time_step': 0.009578060388565063, 'td_error': 0.46037988766409327, 'value_scale': -0.04809605224176692, 'discounted_advantage': -0.0003804705207104094, 'initial_state': -0.043087366968393326, 'diff_eval': 2348.2442870532927} step=4000
2025-12-06 17:28.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.51it/s, vae_loss=0.0363]


2025-12-06 17:28.30 [info     ] PLASWithPerturbation_20251206172719: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.0019958436489105223, 'time_algorithm_update': 0.007642771005630493, 'vae_loss': 0.036233834939077496, 'time_step': 0.009866125822067261, 'td_error': 0.4602253150728116, 'value_scale': -0.04818189178912745, 'discounted_advantage': -0.00021157149377234666, 'initial_state': -0.04311292618513107, 'diff_eval': 2229.463279570865} step=5000
2025-12-06 17:28.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.00it/s, vae_loss=0.0361]


2025-12-06 17:28.44 [info     ] PLASWithPerturbation_20251206172719: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.001950455904006958, 'time_algorithm_update': 0.007542680501937866, 'vae_loss': 0.036070641627535226, 'time_step': 0.00972492480278015, 'td_error': 0.46027303072913395, 'value_scale': -0.04809755926494302, 'discounted_advantage': 1.2367737501464336e-05, 'initial_state': -0.042852457612752914, 'diff_eval': 2070.6975810388008} step=6000
2025-12-06 17:28.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.82it/s, vae_loss=0.0352]


2025-12-06 17:28.59 [info     ] PLASWithPerturbation_20251206172719: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.001962801933288574, 'time_algorithm_update': 0.007499860286712646, 'vae_loss': 0.03515174402855337, 'time_step': 0.00971780252456665, 'td_error': 0.46003862519183203, 'value_scale': -0.04832850121373852, 'discounted_advantage': 0.0019303458333169944, 'initial_state': -0.04278344288468361, 'diff_eval': 1955.4261986658848} step=7000
2025-12-06 17:28.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.91it/s, vae_loss=0.0338]


2025-12-06 17:29.13 [info     ] PLASWithPerturbation_20251206172719: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.0020418453216552736, 'time_algorithm_update': 0.007736823797225952, 'vae_loss': 0.03373859604820609, 'time_step': 0.010018331527709961, 'td_error': 0.45942083346459656, 'value_scale': -0.04882944114498438, 'discounted_advantage': 0.004819112924373848, 'initial_state': -0.04329274222254753, 'diff_eval': 1797.4083371890938} step=8000
2025-12-06 17:29.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.09it/s, vae_loss=0.0333]


2025-12-06 17:29.28 [info     ] PLASWithPerturbation_20251206172719: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.002005126237869263, 'time_algorithm_update': 0.007663731575012207, 'vae_loss': 0.03326492758840322, 'time_step': 0.009902013301849364, 'td_error': 0.46037883970125676, 'value_scale': -0.04792755730575058, 'discounted_advantage': -0.001438264831332917, 'initial_state': -0.042753301560878754, 'diff_eval': 1631.9389397295072} step=9000
2025-12-06 17:29.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:10<00:00, 95.31it/s, vae_loss=0.0319]


2025-12-06 17:29.43 [info     ] PLASWithPerturbation_20251206172719: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.0021530323028564453, 'time_algorithm_update': 0.007868322849273681, 'vae_loss': 0.03186919697932899, 'time_step': 0.010285192966461182, 'td_error': 0.459577491972511, 'value_scale': -0.048634556655312224, 'discounted_advantage': 0.002852900402887931, 'initial_state': -0.04292341321706772, 'diff_eval': 1554.3737988277094} step=10000
2025-12-06 17:29.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.38it/s, vae_loss=0.0309]


2025-12-06 17:29.58 [info     ] PLASWithPerturbation_20251206172719: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.002004392862319946, 'time_algorithm_update': 0.00740077018737793, 'vae_loss': 0.030914883543737233, 'time_step': 0.00966076374053955, 'td_error': 0.4595211819895489, 'value_scale': -0.048606293123334095, 'discounted_advantage': 0.0020889701569727125, 'initial_state': -0.04306669160723686, 'diff_eval': 1411.878924949417} step=11000
2025-12-06 17:29.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.70it/s, vae_loss=0.0304]


2025-12-06 17:30.12 [info     ] PLASWithPerturbation_20251206172719: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.001944143295288086, 'time_algorithm_update': 0.007730804681777954, 'vae_loss': 0.030401310643181204, 'time_step': 0.009928573608398437, 'td_error': 0.4598378858483593, 'value_scale': -0.048350793747067425, 'discounted_advantage': 0.0018982461837381594, 'initial_state': -0.04275311529636383, 'diff_eval': 1379.6446049013955} step=12000
2025-12-06 17:30.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.86it/s, vae_loss=0.0292]


2025-12-06 17:30.26 [info     ] PLASWithPerturbation_20251206172719: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.0019094390869140625, 'time_algorithm_update': 0.007373098611831665, 'vae_loss': 0.02915151980891824, 'time_step': 0.009523914813995362, 'td_error': 0.45927900000935673, 'value_scale': -0.048798214454884574, 'discounted_advantage': 0.005053342854144837, 'initial_state': -0.04288557916879654, 'diff_eval': 1288.9228403865397} step=13000
2025-12-06 17:30.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.68it/s, vae_loss=0.0288]


2025-12-06 17:30.41 [info     ] PLASWithPerturbation_20251206172719: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.0020421352386474608, 'time_algorithm_update': 0.007748359680175781, 'vae_loss': 0.028848773811012505, 'time_step': 0.010031223058700562, 'td_error': 0.4591195841563682, 'value_scale': -0.04896867477818262, 'discounted_advantage': 0.004791779939523519, 'initial_state': -0.043052397668361664, 'diff_eval': 1263.3161567659888} step=14000
2025-12-06 17:30.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.61it/s, vae_loss=0.0287]


2025-12-06 17:30.55 [info     ] PLASWithPerturbation_20251206172719: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.0019303169250488282, 'time_algorithm_update': 0.0073897731304168705, 'vae_loss': 0.028708335129544137, 'time_step': 0.00955079460144043, 'td_error': 0.46083943911651387, 'value_scale': -0.04741648708572432, 'discounted_advantage': -0.005904852025640138, 'initial_state': -0.042538199573755264, 'diff_eval': 1188.5876086614999} step=15000
2025-12-06 17:30.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.12it/s, vae_loss=0.028]


2025-12-06 17:31.09 [info     ] PLASWithPerturbation_20251206172719: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.0019282383918762208, 'time_algorithm_update': 0.007430485725402832, 'vae_loss': 0.027917359426617622, 'time_step': 0.009595943927764892, 'td_error': 0.45887202074010386, 'value_scale': -0.04924726340379519, 'discounted_advantage': 0.006925453660051591, 'initial_state': -0.043260782957077026, 'diff_eval': 1225.0691678695582} step=16000
2025-12-06 17:31.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.99it/s, vae_loss=0.0269]


2025-12-06 17:31.24 [info     ] PLASWithPerturbation_20251206172719: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.0019212393760681152, 'time_algorithm_update': 0.00745296311378479, 'vae_loss': 0.02694863400235772, 'time_step': 0.009607501268386841, 'td_error': 0.45910589760133685, 'value_scale': -0.048969495228873464, 'discounted_advantage': 0.0032207090632245384, 'initial_state': -0.0431804358959198, 'diff_eval': 1166.5894829873298} step=17000
2025-12-06 17:31.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.37it/s, vae_loss=0.0265]


2025-12-06 17:31.38 [info     ] PLASWithPerturbation_20251206172719: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.002009984016418457, 'time_algorithm_update': 0.0075394344329833985, 'vae_loss': 0.0265043202675879, 'time_step': 0.009780557870864869, 'td_error': 0.4587702120123838, 'value_scale': -0.049365615442142276, 'discounted_advantage': 0.008051724655545359, 'initial_state': -0.04324744641780853, 'diff_eval': 1195.660015835948} step=18000
2025-12-06 17:31.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.07it/s, vae_loss=0.0266]


2025-12-06 17:31.54 [info     ] PLASWithPerturbation_20251206172719: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.0019062705039978027, 'time_algorithm_update': 0.00738600754737854, 'vae_loss': 0.026636267503723502, 'time_step': 0.00952151846885681, 'td_error': 0.45908962308001955, 'value_scale': -0.04900664360865901, 'discounted_advantage': 0.004817569938905467, 'initial_state': -0.043297041207551956, 'diff_eval': 1101.3175634838917} step=19000
2025-12-06 17:31.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.54it/s, vae_loss=0.0257]


2025-12-06 17:32.07 [info     ] PLASWithPerturbation_20251206172719: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.0018332138061523437, 'time_algorithm_update': 0.007299523830413818, 'vae_loss': 0.02576085793785751, 'time_step': 0.009378501176834107, 'td_error': 0.4597213685870989, 'value_scale': -0.04845554983377619, 'discounted_advantage': 0.0012237021199050086, 'initial_state': -0.04304063692688942, 'diff_eval': 1050.8291961059656} step=20000
2025-12-06 17:32.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.48it/s, vae_loss=0.0256]


2025-12-06 17:32.22 [info     ] PLASWithPerturbation_20251206172719: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.0019985530376434328, 'time_algorithm_update': 0.007532137632369995, 'vae_loss': 0.025580031550489365, 'time_step': 0.009765748977661132, 'td_error': 0.4602325458990382, 'value_scale': -0.04796248023286587, 'discounted_advantage': -0.002117871386382766, 'initial_state': -0.04266707971692085, 'diff_eval': 1029.5743298596342} step=21000
2025-12-06 17:32.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.41it/s, vae_loss=0.025]


2025-12-06 17:32.36 [info     ] PLASWithPerturbation_20251206172719: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.0019641141891479494, 'time_algorithm_update': 0.007425502061843872, 'vae_loss': 0.025040026988834143, 'time_step': 0.009739249229431153, 'td_error': 0.4600070965677463, 'value_scale': -0.04819779896957227, 'discounted_advantage': 0.000353718798426579, 'initial_state': -0.043022338300943375, 'diff_eval': 996.3549841916579} step=22000
2025-12-06 17:32.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.37it/s, vae_loss=0.0246]


2025-12-06 17:32.51 [info     ] PLASWithPerturbation_20251206172719: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.002000941276550293, 'time_algorithm_update': 0.007513477802276612, 'vae_loss': 0.024636771250516176, 'time_step': 0.00976046347618103, 'td_error': 0.46037645412541917, 'value_scale': -0.047877109450680785, 'discounted_advantage': -0.0005579561070011047, 'initial_state': -0.04238804802298546, 'diff_eval': 1025.943915195823} step=23000
2025-12-06 17:32.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.88it/s, vae_loss=0.0246]


2025-12-06 17:33.05 [info     ] PLASWithPerturbation_20251206172719: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.0019288313388824462, 'time_algorithm_update': 0.00747883129119873, 'vae_loss': 0.024606771419756115, 'time_step': 0.009637272596359252, 'td_error': 0.4587650619727885, 'value_scale': -0.049350458391679, 'discounted_advantage': 0.00657205420445231, 'initial_state': -0.043275002390146255, 'diff_eval': 1048.8109179397072} step=24000
2025-12-06 17:33.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.91it/s, vae_loss=0.0241]


2025-12-06 17:33.19 [info     ] PLASWithPerturbation_20251206172719: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.0019366579055786132, 'time_algorithm_update': 0.007541618347167969, 'vae_loss': 0.024076114445924758, 'time_step': 0.009721378564834595, 'td_error': 0.46020790331799283, 'value_scale': -0.04799956648765883, 'discounted_advantage': -0.001090456563009595, 'initial_state': -0.04271663725376129, 'diff_eval': 962.1102510513937} step=25000
2025-12-06 17:33.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.30it/s, vae_loss=0.024]


2025-12-06 17:33.33 [info     ] PLASWithPerturbation_20251206172719: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.001853036880493164, 'time_algorithm_update': 0.007336972713470459, 'vae_loss': 0.02406540989689529, 'time_step': 0.009411722421646118, 'td_error': 0.4589770169632913, 'value_scale': -0.04922324692758623, 'discounted_advantage': 0.006756412064741138, 'initial_state': -0.04328165575861931, 'diff_eval': 1036.8730383594057} step=26000
2025-12-06 17:33.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.32it/s, vae_loss=0.0238]


2025-12-06 17:33.47 [info     ] PLASWithPerturbation_20251206172719: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.0018833982944488526, 'time_algorithm_update': 0.007296208620071411, 'vae_loss': 0.023804323402233422, 'time_step': 0.009404365062713624, 'td_error': 0.4587985212878733, 'value_scale': -0.04934969306935159, 'discounted_advantage': 0.008870460515173074, 'initial_state': -0.04314453899860382, 'diff_eval': 1061.7209308288943} step=27000
2025-12-06 17:33.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.61it/s, vae_loss=0.023]


2025-12-06 17:34.01 [info     ] PLASWithPerturbation_20251206172719: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.0018821890354156494, 'time_algorithm_update': 0.007271602630615235, 'vae_loss': 0.023052625425159932, 'time_step': 0.009380531549453735, 'td_error': 0.46027963390433235, 'value_scale': -0.04797488156969956, 'discounted_advantage': -0.00023197967862463494, 'initial_state': -0.04267022758722305, 'diff_eval': 910.0588556735389} step=28000
2025-12-06 17:34.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.75it/s, vae_loss=0.0231]


2025-12-06 17:34.16 [info     ] PLASWithPerturbation_20251206172719: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.002000057935714722, 'time_algorithm_update': 0.0076033749580383305, 'vae_loss': 0.023041902494616805, 'time_step': 0.00983466386795044, 'td_error': 0.45842976022176174, 'value_scale': -0.0496280954089812, 'discounted_advantage': 0.009107805205581531, 'initial_state': -0.04355917498469353, 'diff_eval': 1025.9447402287542} step=29000
2025-12-06 17:34.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.22it/s, vae_loss=0.023]


2025-12-06 17:34.30 [info     ] PLASWithPerturbation_20251206172719: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.0019142394065856934, 'time_algorithm_update': 0.007375831604003906, 'vae_loss': 0.022949720717966558, 'time_step': 0.009503353118896484, 'td_error': 0.4604099230468868, 'value_scale': -0.04786587699862121, 'discounted_advantage': -0.0014177612478376079, 'initial_state': -0.04250279441475868, 'diff_eval': 930.6903460729028} step=30000
2025-12-06 17:34.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.93it/s, vae_loss=0.0225]


2025-12-06 17:34.44 [info     ] PLASWithPerturbation_20251206172719: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.0019216961860656738, 'time_algorithm_update': 0.00729117751121521, 'vae_loss': 0.02246323601529002, 'time_step': 0.009435434818267823, 'td_error': 0.46042510931051595, 'value_scale': -0.047805360178029895, 'discounted_advantage': -0.001414635234805163, 'initial_state': -0.04200982674956322, 'diff_eval': 951.105983595717} step=31000
2025-12-06 17:34.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.56it/s, vae_loss=0.0221]


2025-12-06 17:34.58 [info     ] PLASWithPerturbation_20251206172719: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.0018203105926513671, 'time_algorithm_update': 0.00723897647857666, 'vae_loss': 0.02213754371739924, 'time_step': 0.009291138410568237, 'td_error': 0.4598974690391161, 'value_scale': -0.048344489712773976, 'discounted_advantage': 0.001308089026391075, 'initial_state': -0.04305184260010719, 'diff_eval': 888.0526339188717} step=32000
2025-12-06 17:34.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.50it/s, vae_loss=0.0218]


2025-12-06 17:35.12 [info     ] PLASWithPerturbation_20251206172719: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.001915590524673462, 'time_algorithm_update': 0.007496302843093872, 'vae_loss': 0.02180404567811638, 'time_step': 0.00965779709815979, 'td_error': 0.45944825946880447, 'value_scale': -0.04869787586066007, 'discounted_advantage': 0.004275862547146182, 'initial_state': -0.04296541213989258, 'diff_eval': 909.2306071871225} step=33000
2025-12-06 17:35.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.49it/s, vae_loss=0.022]


2025-12-06 17:35.26 [info     ] PLASWithPerturbation_20251206172719: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.0019222006797790527, 'time_algorithm_update': 0.007515518188476562, 'vae_loss': 0.021985527319833637, 'time_step': 0.009670804738998413, 'td_error': 0.45952641564196206, 'value_scale': -0.048651549139599515, 'discounted_advantage': 0.00423807066924785, 'initial_state': -0.04247375577688217, 'diff_eval': 929.335258921537} step=34000
2025-12-06 17:35.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.66it/s, vae_loss=0.0214]


2025-12-06 17:35.40 [info     ] PLASWithPerturbation_20251206172719: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.0018893775939941406, 'time_algorithm_update': 0.007340651512145996, 'vae_loss': 0.02146591547690332, 'time_step': 0.00946367621421814, 'td_error': 0.45884348450616297, 'value_scale': -0.04929700988553283, 'discounted_advantage': 0.007654269384920759, 'initial_state': -0.04305896908044815, 'diff_eval': 936.2785415658591} step=35000
2025-12-06 17:35.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.42it/s, vae_loss=0.0213]


2025-12-06 17:35.54 [info     ] PLASWithPerturbation_20251206172719: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.001868710994720459, 'time_algorithm_update': 0.007136181354522705, 'vae_loss': 0.021325974303297697, 'time_step': 0.009224232912063599, 'td_error': 0.45971635836167707, 'value_scale': -0.048475999987784894, 'discounted_advantage': 0.0021447021703494776, 'initial_state': -0.04306258261203766, 'diff_eval': 865.0143452710774} step=36000
2025-12-06 17:35.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.20it/s, vae_loss=0.0209] 


2025-12-06 17:36.09 [info     ] PLASWithPerturbation_20251206172719: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.0019876818656921385, 'time_algorithm_update': 0.0076610522270202635, 'vae_loss': 0.020869277726858853, 'time_step': 0.009883995532989503, 'td_error': 0.46027824771027864, 'value_scale': -0.047929692990009966, 'discounted_advantage': -0.00029462834966930616, 'initial_state': -0.042154569178819656, 'diff_eval': 866.644453926161} step=37000
2025-12-06 17:36.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.64it/s, vae_loss=0.0209]


2025-12-06 17:36.23 [info     ] PLASWithPerturbation_20251206172719: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.001964181423187256, 'time_algorithm_update': 0.007464837551116943, 'vae_loss': 0.02086076530907303, 'time_step': 0.009657879829406739, 'td_error': 0.45985854754350214, 'value_scale': -0.04828428076103967, 'discounted_advantage': 0.0017344243038342941, 'initial_state': -0.04239480197429657, 'diff_eval': 837.1048814948025} step=38000
2025-12-06 17:36.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.48it/s, vae_loss=0.0203]


2025-12-06 17:36.37 [info     ] PLASWithPerturbation_20251206172719: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.00191011118888855, 'time_algorithm_update': 0.007339632987976074, 'vae_loss': 0.02027244276087731, 'time_step': 0.00948304295539856, 'td_error': 0.45915470051231777, 'value_scale': -0.04903841313218256, 'discounted_advantage': 0.006399805393020892, 'initial_state': -0.04308461770415306, 'diff_eval': 899.4093920378282} step=39000
2025-12-06 17:36.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.01it/s, vae_loss=0.0207]


2025-12-06 17:36.51 [info     ] PLASWithPerturbation_20251206172719: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.001901097297668457, 'time_algorithm_update': 0.007312882661819458, 'vae_loss': 0.020661892952397464, 'time_step': 0.00943622088432312, 'td_error': 0.45855735158098065, 'value_scale': -0.049537967490414836, 'discounted_advantage': 0.008877262889520574, 'initial_state': -0.04332974925637245, 'diff_eval': 914.4543401321149} step=40000
2025-12-06 17:36.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.93it/s, vae_loss=0.021]


2025-12-06 17:37.05 [info     ] PLASWithPerturbation_20251206172719: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.0018866920471191405, 'time_algorithm_update': 0.007408949851989746, 'vae_loss': 0.020987174949608743, 'time_step': 0.009518176078796388, 'td_error': 0.4604154261284322, 'value_scale': -0.04787043438270401, 'discounted_advantage': -0.0015314831338607289, 'initial_state': -0.04215642064809799, 'diff_eval': 882.5722007943584} step=41000
2025-12-06 17:37.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.95it/s, vae_loss=0.0202]


2025-12-06 17:37.19 [info     ] PLASWithPerturbation_20251206172719: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.00185311222076416, 'time_algorithm_update': 0.0072804193496704105, 'vae_loss': 0.02017533521912992, 'time_step': 0.009354192972183227, 'td_error': 0.4582508395712389, 'value_scale': -0.04984633198979077, 'discounted_advantage': 0.010806333264399831, 'initial_state': -0.043304670602083206, 'diff_eval': 971.5855002208915} step=42000
2025-12-06 17:37.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.28it/s, vae_loss=0.0202]


2025-12-06 17:37.33 [info     ] PLASWithPerturbation_20251206172719: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.0019326441287994384, 'time_algorithm_update': 0.007442041635513306, 'vae_loss': 0.020253319296985865, 'time_step': 0.00959187364578247, 'td_error': 0.4605488133354348, 'value_scale': -0.047627204763316625, 'discounted_advantage': -0.003037103063827678, 'initial_state': -0.04220607876777649, 'diff_eval': 825.6784308340215} step=43000
2025-12-06 17:37.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.69it/s, vae_loss=0.0203]


2025-12-06 17:37.48 [info     ] PLASWithPerturbation_20251206172719: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.0019133114814758301, 'time_algorithm_update': 0.007406723976135254, 'vae_loss': 0.020299520399421452, 'time_step': 0.009546927452087402, 'td_error': 0.4608963435822663, 'value_scale': -0.047421385227700216, 'discounted_advantage': -0.00447285569285863, 'initial_state': -0.04224809259176254, 'diff_eval': 849.9676946071229} step=44000
2025-12-06 17:37.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.48it/s, vae_loss=0.0197]


2025-12-06 17:38.02 [info     ] PLASWithPerturbation_20251206172719: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.001930685520172119, 'time_algorithm_update': 0.007318723440170288, 'vae_loss': 0.019663354406133295, 'time_step': 0.009476666450500488, 'td_error': 0.4595422215015479, 'value_scale': -0.048660915444487175, 'discounted_advantage': 0.0033033615586715998, 'initial_state': -0.04267222434282303, 'diff_eval': 815.9472459776977} step=45000
2025-12-06 17:38.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.73it/s, vae_loss=0.0197]


2025-12-06 17:38.16 [info     ] PLASWithPerturbation_20251206172719: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.0019390413761138916, 'time_algorithm_update': 0.007466860294342041, 'vae_loss': 0.01973755241557956, 'time_step': 0.009635678768157959, 'td_error': 0.4595291519634384, 'value_scale': -0.048684194523994154, 'discounted_advantage': 0.0040085345651633296, 'initial_state': -0.04293489828705788, 'diff_eval': 807.2851021283087} step=46000
2025-12-06 17:38.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.16it/s, vae_loss=0.0192]


2025-12-06 17:38.30 [info     ] PLASWithPerturbation_20251206172719: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.0019266648292541504, 'time_algorithm_update': 0.007629244089126587, 'vae_loss': 0.01925531111843884, 'time_step': 0.009787887811660767, 'td_error': 0.4587480730140325, 'value_scale': -0.04943113067060122, 'discounted_advantage': 0.00912619485829059, 'initial_state': -0.04339909180998802, 'diff_eval': 891.8751521850733} step=47000
2025-12-06 17:38.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.61it/s, vae_loss=0.0195]


2025-12-06 17:38.44 [info     ] PLASWithPerturbation_20251206172719: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.0019280321598052978, 'time_algorithm_update': 0.007480709552764892, 'vae_loss': 0.019497766891960054, 'time_step': 0.009648185014724732, 'td_error': 0.4596605805201465, 'value_scale': -0.04851939648972186, 'discounted_advantage': 0.0023048878520074167, 'initial_state': -0.042878732085227966, 'diff_eval': 779.1232778433688} step=48000
2025-12-06 17:38.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.33it/s, vae_loss=0.0192]


2025-12-06 17:38.58 [info     ] PLASWithPerturbation_20251206172719: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.0019137971401214599, 'time_algorithm_update': 0.0074923143386840824, 'vae_loss': 0.019198537333868445, 'time_step': 0.009666401863098145, 'td_error': 0.459806558525171, 'value_scale': -0.04841591866393413, 'discounted_advantage': 0.00034239929163375416, 'initial_state': -0.04283518344163895, 'diff_eval': 785.18318086753} step=49000
2025-12-06 17:38.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.64it/s, vae_loss=0.0188]


2025-12-06 17:39.13 [info     ] PLASWithPerturbation_20251206172719: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.001915701389312744, 'time_algorithm_update': 0.007392853975296021, 'vae_loss': 0.018792674202471972, 'time_step': 0.009552742958068848, 'td_error': 0.4594050816835527, 'value_scale': -0.04880809274557525, 'discounted_advantage': 0.0035614911580680907, 'initial_state': -0.042926300317049026, 'diff_eval': 775.2172928482678} step=50000
2025-12-06 17:39.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.78it/s, vae_loss=0.0187]


2025-12-06 17:39.27 [info     ] PLASWithPerturbation_20251206172719: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.0018649744987487794, 'time_algorithm_update': 0.007345325231552124, 'vae_loss': 0.018700068697333337, 'time_step': 0.009445190906524658, 'td_error': 0.4591623348633006, 'value_scale': -0.04903437661854839, 'discounted_advantage': 0.006972850535468347, 'initial_state': -0.04294586181640625, 'diff_eval': 825.7198078735745} step=51000
2025-12-06 17:39.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.53it/s, vae_loss=0.0189]


2025-12-06 17:39.41 [info     ] PLASWithPerturbation_20251206172719: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.001992976427078247, 'time_algorithm_update': 0.0076404399871826175, 'vae_loss': 0.018838757551740856, 'time_step': 0.009859786748886108, 'td_error': 0.4587492017003798, 'value_scale': -0.04939221915002828, 'discounted_advantage': 0.008350241554813906, 'initial_state': -0.04302535206079483, 'diff_eval': 824.2675874503767} step=52000
2025-12-06 17:39.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.81it/s, vae_loss=0.0187]


2025-12-06 17:39.55 [info     ] PLASWithPerturbation_20251206172719: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.001983973741531372, 'time_algorithm_update': 0.007525935411453247, 'vae_loss': 0.01869176282081753, 'time_step': 0.00973686408996582, 'td_error': 0.459161145969376, 'value_scale': -0.04899187227640822, 'discounted_advantage': 0.005925051605853129, 'initial_state': -0.04288904741406441, 'diff_eval': 745.0064621955664} step=53000
2025-12-06 17:39.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.28it/s, vae_loss=0.0187]


2025-12-06 17:40.09 [info     ] PLASWithPerturbation_20251206172719: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.0018442697525024414, 'time_algorithm_update': 0.007396622657775879, 'vae_loss': 0.01869884526822716, 'time_step': 0.009488455533981323, 'td_error': 0.4588435430529825, 'value_scale': -0.04924696298668978, 'discounted_advantage': 0.006746209507630368, 'initial_state': -0.04299480468034744, 'diff_eval': 776.9310190415279} step=54000
2025-12-06 17:40.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.72it/s, vae_loss=0.0187]


2025-12-06 17:40.24 [info     ] PLASWithPerturbation_20251206172719: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.001922950267791748, 'time_algorithm_update': 0.007455607652664184, 'vae_loss': 0.018736040753312408, 'time_step': 0.009633545398712157, 'td_error': 0.45954598968867016, 'value_scale': -0.048588478423683135, 'discounted_advantage': 0.002821542941042685, 'initial_state': -0.04281897097826004, 'diff_eval': 721.7744256507538} step=55000
2025-12-06 17:40.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.13it/s, vae_loss=0.0181]


2025-12-06 17:40.38 [info     ] PLASWithPerturbation_20251206172719: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.0018749363422393799, 'time_algorithm_update': 0.007383138179779053, 'vae_loss': 0.01810594874806702, 'time_step': 0.009508567571640014, 'td_error': 0.4593502797960073, 'value_scale': -0.04881065675117581, 'discounted_advantage': 0.003086699129524345, 'initial_state': -0.04317497834563255, 'diff_eval': 748.2393716715569} step=56000
2025-12-06 17:40.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.77it/s, vae_loss=0.0179]


2025-12-06 17:40.52 [info     ] PLASWithPerturbation_20251206172719: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.0019978456497192383, 'time_algorithm_update': 0.007713082790374756, 'vae_loss': 0.017904046992771327, 'time_step': 0.009933596849441529, 'td_error': 0.45965064591009747, 'value_scale': -0.04854223692371242, 'discounted_advantage': 0.003954383296904832, 'initial_state': -0.042631473392248154, 'diff_eval': 732.5709982186265} step=57000
2025-12-06 17:40.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.68it/s, vae_loss=0.0185]


2025-12-06 17:41.06 [info     ] PLASWithPerturbation_20251206172719: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.0018353564739227294, 'time_algorithm_update': 0.007222676038742065, 'vae_loss': 0.018571084841154516, 'time_step': 0.00928831696510315, 'td_error': 0.45951921047090893, 'value_scale': -0.04865884767261342, 'discounted_advantage': 0.0029824521109513918, 'initial_state': -0.0431109182536602, 'diff_eval': 696.3380538872539} step=58000
2025-12-06 17:41.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.00it/s, vae_loss=0.0183]


2025-12-06 17:41.21 [info     ] PLASWithPerturbation_20251206172719: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.0018352930545806885, 'time_algorithm_update': 0.007948392152786254, 'vae_loss': 0.018262461490463466, 'time_step': 0.010020336389541626, 'td_error': 0.45921625924372417, 'value_scale': -0.04895776641857244, 'discounted_advantage': 0.0052363443495839586, 'initial_state': -0.042795371264219284, 'diff_eval': 764.8558193077322} step=59000
2025-12-06 17:41.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.88it/s, vae_loss=0.0178]


2025-12-06 17:41.35 [info     ] PLASWithPerturbation_20251206172719: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.0019097790718078614, 'time_algorithm_update': 0.007496279716491699, 'vae_loss': 0.017840270642656832, 'time_step': 0.009632211685180664, 'td_error': 0.4592368167099406, 'value_scale': -0.04893533775445514, 'discounted_advantage': 0.00630896513710344, 'initial_state': -0.042620811611413956, 'diff_eval': 765.2469855878913} step=60000
2025-12-06 17:41.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.28it/s, vae_loss=0.0178]


2025-12-06 17:41.50 [info     ] PLASWithPerturbation_20251206172719: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.0019808378219604494, 'time_algorithm_update': 0.007654303073883056, 'vae_loss': 0.01783379027992487, 'time_step': 0.009885891675949096, 'td_error': 0.45966173255358345, 'value_scale': -0.04847548810580912, 'discounted_advantage': 0.0014327286104827278, 'initial_state': -0.04284156486392021, 'diff_eval': 678.7321941890019} step=61000
2025-12-06 17:41.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.67it/s, vae_loss=0.0176]


2025-12-06 17:42.04 [info     ] PLASWithPerturbation_20251206172719: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.0019173712730407714, 'time_algorithm_update': 0.007572690725326538, 'vae_loss': 0.017629921792075037, 'time_step': 0.009736896991729737, 'td_error': 0.45969723457449857, 'value_scale': -0.04852474287264705, 'discounted_advantage': 0.0016153070889437285, 'initial_state': -0.042930010706186295, 'diff_eval': 682.5915607660864} step=62000
2025-12-06 17:42.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.35it/s, vae_loss=0.0175]


2025-12-06 17:42.18 [info     ] PLASWithPerturbation_20251206172719: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.0018886313438415527, 'time_algorithm_update': 0.007266406059265137, 'vae_loss': 0.017541254180949182, 'time_step': 0.009397304773330688, 'td_error': 0.4593702089827662, 'value_scale': -0.0487504745181728, 'discounted_advantage': 0.003926691548123603, 'initial_state': -0.04293351247906685, 'diff_eval': 668.8132413893098} step=63000
2025-12-06 17:42.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.74it/s, vae_loss=0.0173]


2025-12-06 17:42.32 [info     ] PLASWithPerturbation_20251206172719: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.0018819127082824708, 'time_algorithm_update': 0.007350644588470459, 'vae_loss': 0.017330767040140926, 'time_step': 0.009462240934371948, 'td_error': 0.45909562443435625, 'value_scale': -0.049020496424215736, 'discounted_advantage': 0.007180018833836832, 'initial_state': -0.04277242347598076, 'diff_eval': 712.1182080660424} step=64000
2025-12-06 17:42.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.16it/s, vae_loss=0.0173]


2025-12-06 17:42.46 [info     ] PLASWithPerturbation_20251206172719: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.0018847646713256836, 'time_algorithm_update': 0.007301323652267456, 'vae_loss': 0.017279359579551967, 'time_step': 0.00942025899887085, 'td_error': 0.45846417092527325, 'value_scale': -0.049621383956971535, 'discounted_advantage': 0.008503037684520036, 'initial_state': -0.04363904520869255, 'diff_eval': 773.5988395999549} step=65000
2025-12-06 17:42.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.10it/s, vae_loss=0.0173]


2025-12-06 17:43.01 [info     ] PLASWithPerturbation_20251206172719: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.00188811993598938, 'time_algorithm_update': 0.007478538990020752, 'vae_loss': 0.017270085962023587, 'time_step': 0.009594757318496704, 'td_error': 0.45878957548404525, 'value_scale': -0.04925918465828381, 'discounted_advantage': 0.008642929749480808, 'initial_state': -0.04241587594151497, 'diff_eval': 748.5638167924822} step=66000
2025-12-06 17:43.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.75it/s, vae_loss=0.0166]


2025-12-06 17:43.15 [info     ] PLASWithPerturbation_20251206172719: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.0019658336639404297, 'time_algorithm_update': 0.007623449087142944, 'vae_loss': 0.016649606759194285, 'time_step': 0.009826898097991943, 'td_error': 0.45990233629230604, 'value_scale': -0.04824371126340663, 'discounted_advantage': 0.0012451627224098435, 'initial_state': -0.04242389649152756, 'diff_eval': 666.2585371152559} step=67000
2025-12-06 17:43.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.82it/s, vae_loss=0.017] 


2025-12-06 17:43.30 [info     ] PLASWithPerturbation_20251206172719: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.0019566168785095216, 'time_algorithm_update': 0.007604221105575562, 'vae_loss': 0.0170303961224854, 'time_step': 0.009811814785003661, 'td_error': 0.4590869911689165, 'value_scale': -0.04910044185895307, 'discounted_advantage': 0.007736191341183375, 'initial_state': -0.042837437242269516, 'diff_eval': 699.8383311515819} step=68000
2025-12-06 17:43.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:10<00:00, 95.39it/s, vae_loss=0.0167]


2025-12-06 17:43.45 [info     ] PLASWithPerturbation_20251206172719: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.0021284151077270506, 'time_algorithm_update': 0.007885982036590576, 'vae_loss': 0.01671264102542773, 'time_step': 0.010263333082199096, 'td_error': 0.4586074999194215, 'value_scale': -0.04944635294760325, 'discounted_advantage': 0.00877417419646655, 'initial_state': -0.04331577196717262, 'diff_eval': 703.3783012225997} step=69000
2025-12-06 17:43.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.12it/s, vae_loss=0.0165]


2025-12-06 17:43.59 [info     ] PLASWithPerturbation_20251206172719: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.0020326666831970213, 'time_algorithm_update': 0.007697035789489746, 'vae_loss': 0.01656140579096973, 'time_step': 0.009976199150085449, 'td_error': 0.4593059188426568, 'value_scale': -0.04882255932027341, 'discounted_advantage': 0.005525579631393519, 'initial_state': -0.04264063015580177, 'diff_eval': 660.5729711863607} step=70000
2025-12-06 17:43.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.75it/s, vae_loss=0.0168]


2025-12-06 17:44.14 [info     ] PLASWithPerturbation_20251206172719: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.00197688627243042, 'time_algorithm_update': 0.0075214216709136965, 'vae_loss': 0.016831478120293467, 'time_step': 0.00974197244644165, 'td_error': 0.46094125004586184, 'value_scale': -0.04730730901385618, 'discounted_advantage': -0.005218571231536109, 'initial_state': -0.042195118963718414, 'diff_eval': 670.7052422456076} step=71000
2025-12-06 17:44.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.77it/s, vae_loss=0.0167]


2025-12-06 17:44.28 [info     ] PLASWithPerturbation_20251206172719: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.001904630184173584, 'time_algorithm_update': 0.007392385005950928, 'vae_loss': 0.016729610458482057, 'time_step': 0.00953469729423523, 'td_error': 0.45901461249890113, 'value_scale': -0.049024929073135694, 'discounted_advantage': 0.006561819919702754, 'initial_state': -0.04273577779531479, 'diff_eval': 666.3957265655582} step=72000
2025-12-06 17:44.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.30it/s, vae_loss=0.0163]


2025-12-06 17:44.42 [info     ] PLASWithPerturbation_20251206172719: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.00187723708152771, 'time_algorithm_update': 0.007297599554061889, 'vae_loss': 0.01625735119963065, 'time_step': 0.009402231216430664, 'td_error': 0.45945737446950274, 'value_scale': -0.04863158409965396, 'discounted_advantage': 0.0040819171230254865, 'initial_state': -0.04256103187799454, 'diff_eval': 615.1137730822996} step=73000
2025-12-06 17:44.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.98it/s, vae_loss=0.0162]


2025-12-06 17:44.56 [info     ] PLASWithPerturbation_20251206172719: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.0019582552909851074, 'time_algorithm_update': 0.007618403196334839, 'vae_loss': 0.016232512360904366, 'time_step': 0.009813124656677247, 'td_error': 0.45951385038504655, 'value_scale': -0.04862001765837888, 'discounted_advantage': 0.0024891993788422047, 'initial_state': -0.0428326241672039, 'diff_eval': 628.7415565078213} step=74000
2025-12-06 17:44.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.03it/s, vae_loss=0.0165]


2025-12-06 17:45.10 [info     ] PLASWithPerturbation_20251206172719: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.0018880033493041991, 'time_algorithm_update': 0.007307173252105713, 'vae_loss': 0.016482749143615364, 'time_step': 0.009418296575546265, 'td_error': 0.45849353836905843, 'value_scale': -0.04957072483687931, 'discounted_advantage': 0.008969114779315453, 'initial_state': -0.043144091963768005, 'diff_eval': 700.4198503769841} step=75000
2025-12-06 17:45.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.64it/s, vae_loss=0.0163]


2025-12-06 17:45.24 [info     ] PLASWithPerturbation_20251206172719: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.0020191545486450196, 'time_algorithm_update': 0.007596561908721924, 'vae_loss': 0.016258083187509327, 'time_step': 0.009841779708862304, 'td_error': 0.4598212269529357, 'value_scale': -0.04834463739649011, 'discounted_advantage': 0.0018855098625938861, 'initial_state': -0.04242958873510361, 'diff_eval': 637.5897041363835} step=76000
2025-12-06 17:45.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.28it/s, vae_loss=0.0161]


2025-12-06 17:45.39 [info     ] PLASWithPerturbation_20251206172719: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.001970311403274536, 'time_algorithm_update': 0.007493867874145508, 'vae_loss': 0.016116597469896077, 'time_step': 0.009692545652389526, 'td_error': 0.4596796078527503, 'value_scale': -0.048427822205729075, 'discounted_advantage': 0.0032761084418820416, 'initial_state': -0.042368240654468536, 'diff_eval': 628.9996902518524} step=77000
2025-12-06 17:45.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.08it/s, vae_loss=0.0158]


2025-12-06 17:45.53 [info     ] PLASWithPerturbation_20251206172719: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.001807076930999756, 'time_algorithm_update': 0.007256218671798706, 'vae_loss': 0.015822057797573508, 'time_step': 0.009315541982650756, 'td_error': 0.4592559865989761, 'value_scale': -0.04880077917361437, 'discounted_advantage': 0.004184968232798569, 'initial_state': -0.042671456933021545, 'diff_eval': 611.8679677340696} step=78000
2025-12-06 17:45.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.82it/s, vae_loss=0.0157]


2025-12-06 17:46.07 [info     ] PLASWithPerturbation_20251206172719: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.0019178574085235596, 'time_algorithm_update': 0.00748143219947815, 'vae_loss': 0.015739113909192384, 'time_step': 0.009632582187652589, 'td_error': 0.45963877525604263, 'value_scale': -0.04852406263596566, 'discounted_advantage': 0.002918160260074329, 'initial_state': -0.042927760630846024, 'diff_eval': 576.5792769823303} step=79000
2025-12-06 17:46.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.89it/s, vae_loss=0.0156]


2025-12-06 17:46.21 [info     ] PLASWithPerturbation_20251206172719: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.0018878276348114013, 'time_algorithm_update': 0.0073179550170898435, 'vae_loss': 0.015593899115454406, 'time_step': 0.009436201333999633, 'td_error': 0.4579573622657157, 'value_scale': -0.05004797831539936, 'discounted_advantage': 0.01374062633770714, 'initial_state': -0.04329628869891167, 'diff_eval': 773.5209904282348} step=80000
2025-12-06 17:46.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.39it/s, vae_loss=0.016]


2025-12-06 17:46.35 [info     ] PLASWithPerturbation_20251206172719: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.001935938835144043, 'time_algorithm_update': 0.007418745756149292, 'vae_loss': 0.01598597361333668, 'time_step': 0.009587435245513917, 'td_error': 0.4596783244570524, 'value_scale': -0.04847463665956324, 'discounted_advantage': 0.002619685667511395, 'initial_state': -0.042537614703178406, 'diff_eval': 622.4362639070805} step=81000
2025-12-06 17:46.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.71it/s, vae_loss=0.0156]


2025-12-06 17:46.49 [info     ] PLASWithPerturbation_20251206172719: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.0019105536937713624, 'time_algorithm_update': 0.007410727739334107, 'vae_loss': 0.015619659090880304, 'time_step': 0.009556210041046143, 'td_error': 0.4588685830826641, 'value_scale': -0.049193196900326315, 'discounted_advantage': 0.007633845483093615, 'initial_state': -0.042906902730464935, 'diff_eval': 607.7708377506701} step=82000
2025-12-06 17:46.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.65it/s, vae_loss=0.0156]


2025-12-06 17:47.03 [info     ] PLASWithPerturbation_20251206172719: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.001924485206604004, 'time_algorithm_update': 0.007694958925247192, 'vae_loss': 0.015547826576977968, 'time_step': 0.00984562349319458, 'td_error': 0.4595684293194675, 'value_scale': -0.04853892733935464, 'discounted_advantage': 0.0021311989110898944, 'initial_state': -0.04279785975813866, 'diff_eval': 588.1190993254429} step=83000
2025-12-06 17:47.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.14it/s, vae_loss=0.0154]


2025-12-06 17:47.17 [info     ] PLASWithPerturbation_20251206172719: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.0018493435382843017, 'time_algorithm_update': 0.007178622722625733, 'vae_loss': 0.015455517045687884, 'time_step': 0.009248447179794311, 'td_error': 0.4588619801625636, 'value_scale': -0.0491621171819844, 'discounted_advantage': 0.006219970277450698, 'initial_state': -0.0431777760386467, 'diff_eval': 612.4948238480926} step=84000
2025-12-06 17:47.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.61it/s, vae_loss=0.0153]


2025-12-06 17:47.31 [info     ] PLASWithPerturbation_20251206172719: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.0018745787143707275, 'time_algorithm_update': 0.007368114709854126, 'vae_loss': 0.015319388495292515, 'time_step': 0.009472524881362914, 'td_error': 0.45977442274197955, 'value_scale': -0.04832197399770924, 'discounted_advantage': 0.0017871041245887977, 'initial_state': -0.04275187477469444, 'diff_eval': 570.7339586711925} step=85000
2025-12-06 17:47.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.43it/s, vae_loss=0.0153]


2025-12-06 17:47.45 [info     ] PLASWithPerturbation_20251206172719: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.0018269734382629394, 'time_algorithm_update': 0.007168222188949585, 'vae_loss': 0.015308479441795498, 'time_step': 0.009219736099243163, 'td_error': 0.45903009599964334, 'value_scale': -0.04907650109232159, 'discounted_advantage': 0.004941339574340305, 'initial_state': -0.0430830679833889, 'diff_eval': 597.6810729570881} step=86000
2025-12-06 17:47.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.92it/s, vae_loss=0.0153]


2025-12-06 17:47.59 [info     ] PLASWithPerturbation_20251206172719: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.0018549952507019042, 'time_algorithm_update': 0.007336602687835693, 'vae_loss': 0.015295444414019584, 'time_step': 0.009430099725723267, 'td_error': 0.4586940630732784, 'value_scale': -0.0493565326679907, 'discounted_advantage': 0.009417049844037323, 'initial_state': -0.04286216199398041, 'diff_eval': 630.6926786983089} step=87000
2025-12-06 17:47.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.60it/s, vae_loss=0.0149]


2025-12-06 17:48.13 [info     ] PLASWithPerturbation_20251206172719: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.001903317928314209, 'time_algorithm_update': 0.0075186586380004885, 'vae_loss': 0.01492869844008237, 'time_step': 0.009662473201751709, 'td_error': 0.45914705970236175, 'value_scale': -0.04892527940585485, 'discounted_advantage': 0.005382817003554718, 'initial_state': -0.042780693620443344, 'diff_eval': 557.5541082530982} step=88000
2025-12-06 17:48.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.60it/s, vae_loss=0.0149]


2025-12-06 17:48.27 [info     ] PLASWithPerturbation_20251206172719: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.0019763264656066895, 'time_algorithm_update': 0.007439810752868653, 'vae_loss': 0.014874706217553466, 'time_step': 0.009653469562530518, 'td_error': 0.45899617448245206, 'value_scale': -0.049117316877545926, 'discounted_advantage': 0.006426329280401307, 'initial_state': -0.04298366233706474, 'diff_eval': 594.9517068525436} step=89000
2025-12-06 17:48.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.30it/s, vae_loss=0.015]


2025-12-06 17:48.42 [info     ] PLASWithPerturbation_20251206172719: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.0019280226230621337, 'time_algorithm_update': 0.007424257278442383, 'vae_loss': 0.015023216388188303, 'time_step': 0.009589514732360839, 'td_error': 0.4600305273278676, 'value_scale': -0.04811927578306608, 'discounted_advantage': 0.00011834120929286511, 'initial_state': -0.04278741776943207, 'diff_eval': 547.066629258525} step=90000
2025-12-06 17:48.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.53it/s, vae_loss=0.0151]


2025-12-06 17:48.55 [info     ] PLASWithPerturbation_20251206172719: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.0019022772312164307, 'time_algorithm_update': 0.007352550745010376, 'vae_loss': 0.01512544016353786, 'time_step': 0.009476160287857055, 'td_error': 0.4586005032580384, 'value_scale': -0.0494309355370538, 'discounted_advantage': 0.00905052595969746, 'initial_state': -0.04296613857150078, 'diff_eval': 627.0774877783202} step=91000
2025-12-06 17:48.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.03it/s, vae_loss=0.0149]


2025-12-06 17:49.10 [info     ] PLASWithPerturbation_20251206172719: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.0019061379432678222, 'time_algorithm_update': 0.007393622636795044, 'vae_loss': 0.014914259484503417, 'time_step': 0.009521914958953857, 'td_error': 0.4600094082108492, 'value_scale': -0.04812345756233451, 'discounted_advantage': -0.0001899960345257145, 'initial_state': -0.04276445508003235, 'diff_eval': 532.9380048046795} step=92000
2025-12-06 17:49.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.23it/s, vae_loss=0.0147]


2025-12-06 17:49.24 [info     ] PLASWithPerturbation_20251206172719: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.0020314717292785646, 'time_algorithm_update': 0.007730926275253296, 'vae_loss': 0.014642971196677537, 'time_step': 0.009994299411773681, 'td_error': 0.4586891050137417, 'value_scale': -0.04935024853691159, 'discounted_advantage': 0.008004444500911586, 'initial_state': -0.04334821179509163, 'diff_eval': 608.1807842676136} step=93000
2025-12-06 17:49.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.31it/s, vae_loss=0.0145]


2025-12-06 17:49.38 [info     ] PLASWithPerturbation_20251206172719: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.001905643939971924, 'time_algorithm_update': 0.007433764457702637, 'vae_loss': 0.014505549505818635, 'time_step': 0.009578096866607665, 'td_error': 0.4598024042832641, 'value_scale': -0.04830722692082984, 'discounted_advantage': 0.0010996792038056474, 'initial_state': -0.04304216057062149, 'diff_eval': 501.4324166720025} step=94000
2025-12-06 17:49.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.41it/s, vae_loss=0.0145]


2025-12-06 17:49.52 [info     ] PLASWithPerturbation_20251206172719: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.001899526834487915, 'time_algorithm_update': 0.007366757869720459, 'vae_loss': 0.014510524569544941, 'time_step': 0.009491167783737183, 'td_error': 0.4594836614135946, 'value_scale': -0.04860945987653328, 'discounted_advantage': 0.0027842566536581343, 'initial_state': -0.04295574128627777, 'diff_eval': 523.5533439597797} step=95000
2025-12-06 17:49.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.06it/s, vae_loss=0.0144]


2025-12-06 17:50.06 [info     ] PLASWithPerturbation_20251206172719: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.001875840425491333, 'time_algorithm_update': 0.007414284229278564, 'vae_loss': 0.014427110341377556, 'time_step': 0.009519273042678833, 'td_error': 0.45904370705049813, 'value_scale': -0.04901100487400655, 'discounted_advantage': 0.006823874875968297, 'initial_state': -0.042856235057115555, 'diff_eval': 571.3677821632032} step=96000
2025-12-06 17:50.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.60it/s, vae_loss=0.0144]


2025-12-06 17:50.21 [info     ] PLASWithPerturbation_20251206172719: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.0022913947105407716, 'time_algorithm_update': 0.007415624618530273, 'vae_loss': 0.014379394226241857, 'time_step': 0.009944414854049682, 'td_error': 0.45913477509079936, 'value_scale': -0.04892989084841207, 'discounted_advantage': 0.005087517898676785, 'initial_state': -0.04295390099287033, 'diff_eval': 556.1816686852625} step=97000
2025-12-06 17:50.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.66it/s, vae_loss=0.0144]


2025-12-06 17:50.35 [info     ] PLASWithPerturbation_20251206172719: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.0019043793678283692, 'time_algorithm_update': 0.007383844614028931, 'vae_loss': 0.014433526349253952, 'time_step': 0.00953798222541809, 'td_error': 0.4587596707051565, 'value_scale': -0.04921177853826256, 'discounted_advantage': 0.008514434993068642, 'initial_state': -0.04268549010157585, 'diff_eval': 586.2817640410761} step=98000
2025-12-06 17:50.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.07it/s, vae_loss=0.014] 


2025-12-06 17:50.50 [info     ] PLASWithPerturbation_20251206172719: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.001977482795715332, 'time_algorithm_update': 0.007715583562850952, 'vae_loss': 0.014002957766409963, 'time_step': 0.009981795310974121, 'td_error': 0.45891292879975204, 'value_scale': -0.04914630660342251, 'discounted_advantage': 0.006202055257495544, 'initial_state': -0.04313396289944649, 'diff_eval': 558.6099239672924} step=99000
2025-12-06 17:50.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.55it/s, vae_loss=0.0141]


2025-12-06 17:51.04 [info     ] PLASWithPerturbation_20251206172719: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.0019290590286254884, 'time_algorithm_update': 0.007372776508331299, 'vae_loss': 0.014126652311068029, 'time_step': 0.00956063413619995, 'td_error': 0.4585390913928251, 'value_scale': -0.04944547181535272, 'discounted_advantage': 0.008858749128113226, 'initial_state': -0.04323720932006836, 'diff_eval': 569.8222126911002} step=100000
2025-12-06 17:51.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:09<00:00, 107.21it/s, vae_loss=0.0142]


2025-12-06 17:51.18 [info     ] PLASWithPerturbation_20251206172719: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.001757404327392578, 'time_algorithm_update': 0.007186179399490357, 'vae_loss': 0.014159654397517443, 'time_step': 0.009157163381576539, 'td_error': 0.4581500652861476, 'value_scale': -0.04976500746421672, 'discounted_advantage': 0.010840118993318578, 'initial_state': -0.04325981065630913, 'diff_eval': 609.6652294473804} step=101000
2025-12-06 17:51.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:10<00:00, 95.35it/s, vae_loss=0.0139]


2025-12-06 17:51.32 [info     ] PLASWithPerturbation_20251206172719: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.0018880348205566407, 'time_algorithm_update': 0.00815176033973694, 'vae_loss': 0.013849597189575433, 'time_step': 0.010288130283355713, 'td_error': 0.4594499607673115, 'value_scale': -0.04863131799198818, 'discounted_advantage': 0.003679583554051484, 'initial_state': -0.04266878962516785, 'diff_eval': 519.2604832925075} step=102000
2025-12-06 17:51.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.68it/s, vae_loss=0.0138]


2025-12-06 17:51.46 [info     ] PLASWithPerturbation_20251206172719: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.0018094234466552734, 'time_algorithm_update': 0.007157614469528198, 'vae_loss': 0.013799721261952072, 'time_step': 0.009198572874069213, 'td_error': 0.45965457431673773, 'value_scale': -0.04844086778124565, 'discounted_advantage': 0.0006189757326703635, 'initial_state': -0.042784448713064194, 'diff_eval': 512.1202601847954} step=103000
2025-12-06 17:51.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.02it/s, vae_loss=0.0141]


2025-12-06 17:52.00 [info     ] PLASWithPerturbation_20251206172719: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.0019232008457183837, 'time_algorithm_update': 0.007467018604278564, 'vae_loss': 0.014055282265879213, 'time_step': 0.00961683464050293, 'td_error': 0.45970654506277164, 'value_scale': -0.048377486137081524, 'discounted_advantage': 0.0017124072076290194, 'initial_state': -0.04271551966667175, 'diff_eval': 493.29257956546087} step=104000
2025-12-06 17:52.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.65it/s, vae_loss=0.0137]


2025-12-06 17:52.15 [info     ] PLASWithPerturbation_20251206172719: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.0018931074142456055, 'time_algorithm_update': 0.007545446872711182, 'vae_loss': 0.013645043943542988, 'time_step': 0.009653159379959107, 'td_error': 0.4580772082891064, 'value_scale': -0.04980653671825684, 'discounted_advantage': 0.010226536850205158, 'initial_state': -0.04311660677194595, 'diff_eval': 612.2150954082174} step=105000
2025-12-06 17:52.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.39it/s, vae_loss=0.0135]


2025-12-06 17:52.29 [info     ] PLASWithPerturbation_20251206172719: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.0018785746097564698, 'time_algorithm_update': 0.007356858015060425, 'vae_loss': 0.013530438913498075, 'time_step': 0.009482983589172364, 'td_error': 0.4596764214164557, 'value_scale': -0.048381790453966234, 'discounted_advantage': 0.0014135152178806813, 'initial_state': -0.04296674579381943, 'diff_eval': 487.4744696474038} step=106000
2025-12-06 17:52.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.24it/s, vae_loss=0.0135]


2025-12-06 17:52.43 [info     ] PLASWithPerturbation_20251206172719: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.001972651958465576, 'time_algorithm_update': 0.007672137260437012, 'vae_loss': 0.013534214919432998, 'time_step': 0.009884625673294067, 'td_error': 0.45920839301274546, 'value_scale': -0.04880961143253223, 'discounted_advantage': 0.004875643256556139, 'initial_state': -0.0426488034427166, 'diff_eval': 497.57604433914605} step=107000
2025-12-06 17:52.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.33it/s, vae_loss=0.0136]


2025-12-06 17:52.57 [info     ] PLASWithPerturbation_20251206172719: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.0018355076313018798, 'time_algorithm_update': 0.0071801462173461915, 'vae_loss': 0.013570450538769364, 'time_step': 0.009234837293624877, 'td_error': 0.45946429848289577, 'value_scale': -0.048563849245340934, 'discounted_advantage': 0.0033073184117584785, 'initial_state': -0.042505234479904175, 'diff_eval': 509.3042940962561} step=108000
2025-12-06 17:52.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.95it/s, vae_loss=0.0136]


2025-12-06 17:53.11 [info     ] PLASWithPerturbation_20251206172719: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.0020321669578552246, 'time_algorithm_update': 0.007740429162979126, 'vae_loss': 0.013583526029717177, 'time_step': 0.010009839773178101, 'td_error': 0.45895686553171666, 'value_scale': -0.04902959537199774, 'discounted_advantage': 0.005736265040689197, 'initial_state': -0.04306931793689728, 'diff_eval': 507.0939232619001} step=109000
2025-12-06 17:53.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.55it/s, vae_loss=0.0136]


2025-12-06 17:53.26 [info     ] PLASWithPerturbation_20251206172719: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.00193096923828125, 'time_algorithm_update': 0.0074199681282043455, 'vae_loss': 0.013606073561124504, 'time_step': 0.00957486343383789, 'td_error': 0.45893463970423354, 'value_scale': -0.04903035059419819, 'discounted_advantage': 0.005896105836477563, 'initial_state': -0.04282546043395996, 'diff_eval': 521.2397545602322} step=110000
2025-12-06 17:53.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.68it/s, vae_loss=0.0134]


2025-12-06 17:53.40 [info     ] PLASWithPerturbation_20251206172719: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.0020190815925598144, 'time_algorithm_update': 0.007700811386108399, 'vae_loss': 0.013420432936865836, 'time_step': 0.009951929330825806, 'td_error': 0.45859999866292517, 'value_scale': -0.049320974859135024, 'discounted_advantage': 0.00853705790669498, 'initial_state': -0.04298866167664528, 'diff_eval': 541.2450699797802} step=111000
2025-12-06 17:53.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.93it/s, vae_loss=0.0134]


2025-12-06 17:53.55 [info     ] PLASWithPerturbation_20251206172719: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.0019525578022003174, 'time_algorithm_update': 0.007808298587799072, 'vae_loss': 0.013446796064730733, 'time_step': 0.010004310131072999, 'td_error': 0.45940222797515556, 'value_scale': -0.04864455594221397, 'discounted_advantage': 0.003833558709610311, 'initial_state': -0.04289741441607475, 'diff_eval': 480.31401562710937} step=112000
2025-12-06 17:53.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.56it/s, vae_loss=0.0131]


2025-12-06 17:54.09 [info     ] PLASWithPerturbation_20251206172719: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.001845608949661255, 'time_algorithm_update': 0.007472848415374756, 'vae_loss': 0.013139817468822002, 'time_step': 0.009564433813095092, 'td_error': 0.45868976642989207, 'value_scale': -0.04924263597379399, 'discounted_advantage': 0.005658519018719798, 'initial_state': -0.04315440356731415, 'diff_eval': 539.9805633981044} step=113000
2025-12-06 17:54.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.41it/s, vae_loss=0.0132]


2025-12-06 17:54.23 [info     ] PLASWithPerturbation_20251206172719: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.0019479706287384034, 'time_algorithm_update': 0.007504570484161377, 'vae_loss': 0.013244673051405698, 'time_step': 0.00967692255973816, 'td_error': 0.45992322306739286, 'value_scale': -0.04817381861080218, 'discounted_advantage': 0.002297631790300686, 'initial_state': -0.042185619473457336, 'diff_eval': 491.43563395512405} step=114000
2025-12-06 17:54.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.85it/s, vae_loss=0.0133]


2025-12-06 17:54.37 [info     ] PLASWithPerturbation_20251206172719: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.0019049243927001954, 'time_algorithm_update': 0.007396480560302734, 'vae_loss': 0.013325459748040884, 'time_step': 0.009534048557281493, 'td_error': 0.45836426319627854, 'value_scale': -0.0495557190006488, 'discounted_advantage': 0.009957428272979046, 'initial_state': -0.04308716952800751, 'diff_eval': 539.4981549441708} step=115000
2025-12-06 17:54.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.19it/s, vae_loss=0.0129]


2025-12-06 17:54.52 [info     ] PLASWithPerturbation_20251206172719: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.0020465424060821533, 'time_algorithm_update': 0.007707015991210937, 'vae_loss': 0.01294116337504238, 'time_step': 0.009993200302124023, 'td_error': 0.45750761093868275, 'value_scale': -0.05040547935243724, 'discounted_advantage': 0.014992238098376194, 'initial_state': -0.04376750811934471, 'diff_eval': 722.4103916423747} step=116000
2025-12-06 17:54.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.11it/s, vae_loss=0.0128]


2025-12-06 17:55.06 [info     ] PLASWithPerturbation_20251206172719: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.0019171452522277832, 'time_algorithm_update': 0.007566632270812989, 'vae_loss': 0.012839072225149722, 'time_step': 0.009712183713912964, 'td_error': 0.458549482134152, 'value_scale': -0.049420615741694214, 'discounted_advantage': 0.007956860540584214, 'initial_state': -0.04350252449512482, 'diff_eval': 555.2026542937647} step=117000
2025-12-06 17:55.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.23it/s, vae_loss=0.0127]


2025-12-06 17:55.20 [info     ] PLASWithPerturbation_20251206172719: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.0018650977611541749, 'time_algorithm_update': 0.007237610816955567, 'vae_loss': 0.012730745249893516, 'time_step': 0.009328311443328857, 'td_error': 0.4588678156148963, 'value_scale': -0.04912219716513909, 'discounted_advantage': 0.006421201949421172, 'initial_state': -0.04291846230626106, 'diff_eval': 507.1954381733622} step=118000
2025-12-06 17:55.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.50it/s, vae_loss=0.0128]


2025-12-06 17:55.35 [info     ] PLASWithPerturbation_20251206172719: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.0018941128253936767, 'time_algorithm_update': 0.0075222070217132566, 'vae_loss': 0.012747148564085365, 'time_step': 0.0096589035987854, 'td_error': 0.4589846485095003, 'value_scale': -0.048983408688518394, 'discounted_advantage': 0.005417953343218932, 'initial_state': -0.043008994311094284, 'diff_eval': 487.4768055003013} step=119000
2025-12-06 17:55.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.93it/s, vae_loss=0.0129]


2025-12-06 17:55.49 [info     ] PLASWithPerturbation_20251206172719: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.0019456665515899657, 'time_algorithm_update': 0.007640106201171875, 'vae_loss': 0.01289939686935395, 'time_step': 0.009817936658859253, 'td_error': 0.4590206214004108, 'value_scale': -0.04891260837814446, 'discounted_advantage': 0.005587197100998507, 'initial_state': -0.04276677966117859, 'diff_eval': 461.2969256971328} step=120000
2025-12-06 17:55.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.59it/s, vae_loss=0.0132]


2025-12-06 17:56.04 [info     ] PLASWithPerturbation_20251206172719: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.0020117571353912354, 'time_algorithm_update': 0.007719585180282593, 'vae_loss': 0.01316006871405989, 'time_step': 0.009963504791259766, 'td_error': 0.459043567766989, 'value_scale': -0.04893686731516915, 'discounted_advantage': 0.005250057490509041, 'initial_state': -0.04282965511083603, 'diff_eval': 459.5831692083352} step=121000
2025-12-06 17:56.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.52it/s, vae_loss=0.0126]


2025-12-06 17:56.18 [info     ] PLASWithPerturbation_20251206172719: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.0018922266960144042, 'time_algorithm_update': 0.007341459989547729, 'vae_loss': 0.012576244196854531, 'time_step': 0.009474689483642579, 'td_error': 0.4599142552498529, 'value_scale': -0.04804387529695224, 'discounted_advantage': -0.0009849743104518337, 'initial_state': -0.04226815700531006, 'diff_eval': 455.4620971854369} step=122000
2025-12-06 17:56.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.56it/s, vae_loss=0.0126]


2025-12-06 17:56.32 [info     ] PLASWithPerturbation_20251206172719: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.0018644049167633057, 'time_algorithm_update': 0.0073448350429534915, 'vae_loss': 0.012569174787029624, 'time_step': 0.009460457086563111, 'td_error': 0.45863611701605866, 'value_scale': -0.04926905520877052, 'discounted_advantage': 0.005945034643314932, 'initial_state': -0.043187759816646576, 'diff_eval': 514.3835290455926} step=123000
2025-12-06 17:56.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.20it/s, vae_loss=0.0123]


2025-12-06 17:56.46 [info     ] PLASWithPerturbation_20251206172719: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.0019008240699768066, 'time_algorithm_update': 0.007391011476516724, 'vae_loss': 0.012344700916670262, 'time_step': 0.009510473489761353, 'td_error': 0.45820280330504615, 'value_scale': -0.04980349866165924, 'discounted_advantage': 0.011615712092636423, 'initial_state': -0.04384304955601692, 'diff_eval': 620.1701973686053} step=124000
2025-12-06 17:56.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.61it/s, vae_loss=0.0124]


2025-12-06 17:57.00 [info     ] PLASWithPerturbation_20251206172719: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.0019263474941253662, 'time_algorithm_update': 0.007402743339538574, 'vae_loss': 0.012461629123426974, 'time_step': 0.009569611310958862, 'td_error': 0.45905774345206973, 'value_scale': -0.04893767518168573, 'discounted_advantage': 0.005354228680229491, 'initial_state': -0.04297701269388199, 'diff_eval': 486.86123911432287} step=125000
2025-12-06 17:57.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.38it/s, vae_loss=0.0122]


2025-12-06 17:57.14 [info     ] PLASWithPerturbation_20251206172719: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.0018897500038146972, 'time_algorithm_update': 0.0073753206729888915, 'vae_loss': 0.012164039955008775, 'time_step': 0.009492939472198486, 'td_error': 0.45975309095257993, 'value_scale': -0.0481894710194866, 'discounted_advantage': -0.00019977615415765036, 'initial_state': -0.04278866946697235, 'diff_eval': 439.8317260736568} step=126000
2025-12-06 17:57.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.10it/s, vae_loss=0.0121]


2025-12-06 17:57.28 [info     ] PLASWithPerturbation_20251206172719: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.0018659236431121827, 'time_algorithm_update': 0.007342200756072998, 'vae_loss': 0.01213924775738269, 'time_step': 0.00942661166191101, 'td_error': 0.45842925808333174, 'value_scale': -0.04957479579071299, 'discounted_advantage': 0.009505443232308545, 'initial_state': -0.04321005940437317, 'diff_eval': 576.3352394070479} step=127000
2025-12-06 17:57.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.65it/s, vae_loss=0.0122]


2025-12-06 17:57.42 [info     ] PLASWithPerturbation_20251206172719: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.001964395046234131, 'time_algorithm_update': 0.007538913249969482, 'vae_loss': 0.012190725414548069, 'time_step': 0.009747838258743285, 'td_error': 0.45849079934001763, 'value_scale': -0.04945608721585431, 'discounted_advantage': 0.008402670102407073, 'initial_state': -0.04344562068581581, 'diff_eval': 509.97345452098193} step=128000
2025-12-06 17:57.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.55it/s, vae_loss=0.0121]


2025-12-06 17:57.57 [info     ] PLASWithPerturbation_20251206172719: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.001858790636062622, 'time_algorithm_update': 0.007273459434509277, 'vae_loss': 0.012071014101617037, 'time_step': 0.009377215147018433, 'td_error': 0.45898815681817184, 'value_scale': -0.04892906235618406, 'discounted_advantage': 0.005164358122020283, 'initial_state': -0.04277636855840683, 'diff_eval': 455.54881551107053} step=129000
2025-12-06 17:57.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.35it/s, vae_loss=0.0121]


2025-12-06 17:58.11 [info     ] PLASWithPerturbation_20251206172719: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.0018873991966247559, 'time_algorithm_update': 0.007445865631103515, 'vae_loss': 0.012114329555537552, 'time_step': 0.009569771766662597, 'td_error': 0.45969961208361637, 'value_scale': -0.0483596528867027, 'discounted_advantage': 0.0016535853372731531, 'initial_state': -0.042847875505685806, 'diff_eval': 427.7425218439492} step=130000
2025-12-06 17:58.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.28it/s, vae_loss=0.0121]


2025-12-06 17:58.25 [info     ] PLASWithPerturbation_20251206172719: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.0019457099437713624, 'time_algorithm_update': 0.007505676031112671, 'vae_loss': 0.012062121569178998, 'time_step': 0.009681082248687745, 'td_error': 0.45918647374532584, 'value_scale': -0.04878623692954251, 'discounted_advantage': 0.005056638537369707, 'initial_state': -0.04224160313606262, 'diff_eval': 467.071197398313} step=131000
2025-12-06 17:58.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.51it/s, vae_loss=0.0119]


2025-12-06 17:58.39 [info     ] PLASWithPerturbation_20251206172719: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.0019153504371643066, 'time_algorithm_update': 0.007411198616027832, 'vae_loss': 0.011915101974271237, 'time_step': 0.009559070587158203, 'td_error': 0.4594880568431176, 'value_scale': -0.04849825712187842, 'discounted_advantage': 0.004290943876916753, 'initial_state': -0.042430661618709564, 'diff_eval': 436.8268669576749} step=132000
2025-12-06 17:58.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.28it/s, vae_loss=0.012] 


2025-12-06 17:58.54 [info     ] PLASWithPerturbation_20251206172719: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.00196687126159668, 'time_algorithm_update': 0.007686113119125366, 'vae_loss': 0.011953143630642444, 'time_step': 0.00988822317123413, 'td_error': 0.4593513946587445, 'value_scale': -0.04855208365716608, 'discounted_advantage': 0.0024343218218906525, 'initial_state': -0.0428486242890358, 'diff_eval': 424.6585783954748} step=133000
2025-12-06 17:58.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.71it/s, vae_loss=0.0119]


2025-12-06 17:59.08 [info     ] PLASWithPerturbation_20251206172719: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.0018644070625305177, 'time_algorithm_update': 0.007271651744842529, 'vae_loss': 0.01188323163241148, 'time_step': 0.009371750593185424, 'td_error': 0.4586673040537683, 'value_scale': -0.04930135963971774, 'discounted_advantage': 0.00748435024019806, 'initial_state': -0.04348579794168472, 'diff_eval': 481.87277369191776} step=134000
2025-12-06 17:59.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.63it/s, vae_loss=0.0119]


2025-12-06 17:59.22 [info     ] PLASWithPerturbation_20251206172719: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.0018890960216522217, 'time_algorithm_update': 0.007429736137390137, 'vae_loss': 0.011867578228469938, 'time_step': 0.009556356191635132, 'td_error': 0.45840802955114296, 'value_scale': -0.04956015304889458, 'discounted_advantage': 0.008106527821177994, 'initial_state': -0.0436708889901638, 'diff_eval': 522.5969224395071} step=135000
2025-12-06 17:59.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.32it/s, vae_loss=0.0119]


2025-12-06 17:59.36 [info     ] PLASWithPerturbation_20251206172719: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.001931035041809082, 'time_algorithm_update': 0.007305113077163697, 'vae_loss': 0.011918878407683223, 'time_step': 0.009486391305923462, 'td_error': 0.45990123411637296, 'value_scale': -0.04815596041456177, 'discounted_advantage': -0.00036694744424553967, 'initial_state': -0.04289025440812111, 'diff_eval': 414.07837944025135} step=136000
2025-12-06 17:59.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.41it/s, vae_loss=0.0117]


2025-12-06 17:59.50 [info     ] PLASWithPerturbation_20251206172719: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.0019592700004577635, 'time_algorithm_update': 0.007372862339019776, 'vae_loss': 0.0116452258634381, 'time_step': 0.0095752112865448, 'td_error': 0.45965160982746267, 'value_scale': -0.048372338532196156, 'discounted_advantage': 0.0023596452729176074, 'initial_state': -0.042638398706912994, 'diff_eval': 410.6588482785696} step=137000
2025-12-06 17:59.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.49it/s, vae_loss=0.0116]


2025-12-06 18:00.04 [info     ] PLASWithPerturbation_20251206172719: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.0019709396362304686, 'time_algorithm_update': 0.007486400604248047, 'vae_loss': 0.011578419629484415, 'time_step': 0.009722090721130372, 'td_error': 0.45915580777847304, 'value_scale': -0.04878179090174051, 'discounted_advantage': 0.0042718314126510585, 'initial_state': -0.04279117286205292, 'diff_eval': 417.23813304851564} step=138000
2025-12-06 18:00.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.26it/s, vae_loss=0.0115]


2025-12-06 18:00.18 [info     ] PLASWithPerturbation_20251206172719: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.0019358327388763427, 'time_algorithm_update': 0.007329859256744385, 'vae_loss': 0.011556787932291627, 'time_step': 0.009494769334793091, 'td_error': 0.45930577273537887, 'value_scale': -0.04873806354531211, 'discounted_advantage': 0.003886092154279473, 'initial_state': -0.0427715890109539, 'diff_eval': 448.3265326907737} step=139000
2025-12-06 18:00.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.69it/s, vae_loss=0.0116]


2025-12-06 18:00.33 [info     ] PLASWithPerturbation_20251206172719: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.0018852264881134033, 'time_algorithm_update': 0.007530743360519409, 'vae_loss': 0.011603738364763558, 'time_step': 0.009647875547409058, 'td_error': 0.4587719250350121, 'value_scale': -0.04923899126319529, 'discounted_advantage': 0.007062506554890725, 'initial_state': -0.04324430972337723, 'diff_eval': 473.08940115021596} step=140000
2025-12-06 18:00.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.83it/s, vae_loss=0.0116]


2025-12-06 18:00.48 [info     ] PLASWithPerturbation_20251206172719: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.0021689667701721193, 'time_algorithm_update': 0.007632816314697265, 'vae_loss': 0.011566866321489216, 'time_step': 0.010033884525299071, 'td_error': 0.4596826056235719, 'value_scale': -0.04829341897152346, 'discounted_advantage': 0.0008908206894561288, 'initial_state': -0.042747583240270615, 'diff_eval': 400.06353392062965} step=141000
2025-12-06 18:00.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.13it/s, vae_loss=0.0117]


2025-12-06 18:01.03 [info     ] PLASWithPerturbation_20251206172719: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.0018869304656982421, 'time_algorithm_update': 0.007675701379776001, 'vae_loss': 0.011697466861456632, 'time_step': 0.009803216934204101, 'td_error': 0.4593832426796458, 'value_scale': -0.048586931435605205, 'discounted_advantage': 0.0024003103521242934, 'initial_state': -0.04295124113559723, 'diff_eval': 401.3262926702404} step=142000
2025-12-06 18:01.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.20it/s, vae_loss=0.0112]


2025-12-06 18:01.17 [info     ] PLASWithPerturbation_20251206172719: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.0019124631881713868, 'time_algorithm_update': 0.0073716800212860105, 'vae_loss': 0.011242505498230457, 'time_step': 0.009505415916442871, 'td_error': 0.45852950925533614, 'value_scale': -0.04941659879090837, 'discounted_advantage': 0.00914309949976585, 'initial_state': -0.04331568256020546, 'diff_eval': 471.09344023037664} step=143000
2025-12-06 18:01.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.50it/s, vae_loss=0.0112]


2025-12-06 18:01.31 [info     ] PLASWithPerturbation_20251206172719: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.0018545312881469726, 'time_algorithm_update': 0.007376277446746826, 'vae_loss': 0.011184367001056671, 'time_step': 0.009475745916366577, 'td_error': 0.4592321322244966, 'value_scale': -0.04875093988476248, 'discounted_advantage': 0.004267981692689963, 'initial_state': -0.043172258883714676, 'diff_eval': 411.682075451858} step=144000
2025-12-06 18:01.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.46it/s, vae_loss=0.0112]


2025-12-06 18:01.45 [info     ] PLASWithPerturbation_20251206172719: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.001941307544708252, 'time_algorithm_update': 0.007413468599319458, 'vae_loss': 0.011234544279053807, 'time_step': 0.009582920551300049, 'td_error': 0.4599532110697639, 'value_scale': -0.048104419759010204, 'discounted_advantage': -0.00033523971781199705, 'initial_state': -0.04260633513331413, 'diff_eval': 401.7180755783958} step=145000
2025-12-06 18:01.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:10<00:00, 96.73it/s, vae_loss=0.0114]


2025-12-06 18:02.00 [info     ] PLASWithPerturbation_20251206172719: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.002084151029586792, 'time_algorithm_update': 0.0078017480373382565, 'vae_loss': 0.011350942535325885, 'time_step': 0.010132589101791382, 'td_error': 0.4592009600765371, 'value_scale': -0.04882853037161692, 'discounted_advantage': 0.004320591154833091, 'initial_state': -0.04320669546723366, 'diff_eval': 419.6275506814038} step=146000
2025-12-06 18:02.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.20it/s, vae_loss=0.0109]


2025-12-06 18:02.14 [info     ] PLASWithPerturbation_20251206172719: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.0019922549724578856, 'time_algorithm_update': 0.007657775640487671, 'vae_loss': 0.010923286120407284, 'time_step': 0.009890833854675292, 'td_error': 0.45945812989443124, 'value_scale': -0.04859845901348402, 'discounted_advantage': 0.0027938786772734466, 'initial_state': -0.04307227581739426, 'diff_eval': 401.03135319280034} step=147000
2025-12-06 18:02.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.78it/s, vae_loss=0.011]


2025-12-06 18:02.28 [info     ] PLASWithPerturbation_20251206172719: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.0018724596500396728, 'time_algorithm_update': 0.007359730005264282, 'vae_loss': 0.010991366296540945, 'time_step': 0.009458989381790161, 'td_error': 0.45957774419381936, 'value_scale': -0.04846965592496615, 'discounted_advantage': 0.0019428740335189641, 'initial_state': -0.04300132766366005, 'diff_eval': 404.70892276666274} step=148000
2025-12-06 18:02.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.36it/s, vae_loss=0.011]


2025-12-06 18:02.42 [info     ] PLASWithPerturbation_20251206172719: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.0018782126903533936, 'time_algorithm_update': 0.007285918951034546, 'vae_loss': 0.011047514613252133, 'time_step': 0.00938752031326294, 'td_error': 0.45913377600123273, 'value_scale': -0.04884213770951292, 'discounted_advantage': 0.0048875229818261855, 'initial_state': -0.04301363602280617, 'diff_eval': 403.1631507066024} step=149000
2025-12-06 18:02.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.50it/s, vae_loss=0.0108]


2025-12-06 18:02.56 [info     ] PLASWithPerturbation_20251206172719: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.0019877727031707764, 'time_algorithm_update': 0.007545769453048706, 'vae_loss': 0.01079655880201608, 'time_step': 0.009751601934432983, 'td_error': 0.4586628139242675, 'value_scale': -0.049199073029758546, 'discounted_advantage': 0.008112585280335679, 'initial_state': -0.043046072125434875, 'diff_eval': 431.06810395046995} step=150000
2025-12-06 18:02.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.03it/s, vae_loss=0.0108]


2025-12-06 18:03.10 [info     ] PLASWithPerturbation_20251206172719: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.0019239749908447265, 'time_algorithm_update': 0.007441887378692627, 'vae_loss': 0.01082127776183188, 'time_step': 0.009605063438415527, 'td_error': 0.4599033981958254, 'value_scale': -0.04805266145501233, 'discounted_advantage': -0.001122544954422538, 'initial_state': -0.04278881102800369, 'diff_eval': 375.40455296267766} step=151000
2025-12-06 18:03.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.09it/s, vae_loss=0.0109]


2025-12-06 18:03.24 [info     ] PLASWithPerturbation_20251206172719: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.0018242323398590088, 'time_algorithm_update': 0.0072061922550201414, 'vae_loss': 0.010853908838238568, 'time_step': 0.009252978563308716, 'td_error': 0.45880876766951034, 'value_scale': -0.04920487486494257, 'discounted_advantage': 0.006997191019256859, 'initial_state': -0.0432286374270916, 'diff_eval': 470.28356077688966} step=152000
2025-12-06 18:03.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.40it/s, vae_loss=0.0109]


2025-12-06 18:03.39 [info     ] PLASWithPerturbation_20251206172719: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.001870967149734497, 'time_algorithm_update': 0.007375311136245727, 'vae_loss': 0.010859546704683453, 'time_step': 0.009474868774414063, 'td_error': 0.4596218648187464, 'value_scale': -0.04832113668289775, 'discounted_advantage': 0.0009703703167921716, 'initial_state': -0.04289860650897026, 'diff_eval': 398.49623590559213} step=153000
2025-12-06 18:03.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.41it/s, vae_loss=0.011]


2025-12-06 18:03.53 [info     ] PLASWithPerturbation_20251206172719: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.0019272458553314209, 'time_algorithm_update': 0.007422353744506836, 'vae_loss': 0.011018680402543396, 'time_step': 0.009584316968917846, 'td_error': 0.45911807872298116, 'value_scale': -0.04895546399517498, 'discounted_advantage': 0.005909984383953278, 'initial_state': -0.04298298805952072, 'diff_eval': 429.69100439396516} step=154000
2025-12-06 18:03.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.34it/s, vae_loss=0.0106]


2025-12-06 18:04.06 [info     ] PLASWithPerturbation_20251206172719: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.001843313694000244, 'time_algorithm_update': 0.007263503313064575, 'vae_loss': 0.010640969798900187, 'time_step': 0.009324256896972655, 'td_error': 0.4594141768536458, 'value_scale': -0.04858427526380895, 'discounted_advantage': 0.004849313979210402, 'initial_state': -0.04235975444316864, 'diff_eval': 413.92958610879316} step=155000
2025-12-06 18:04.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.24it/s, vae_loss=0.0107]


2025-12-06 18:04.20 [info     ] PLASWithPerturbation_20251206172719: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.0018429160118103027, 'time_algorithm_update': 0.007228458881378174, 'vae_loss': 0.010684031457640231, 'time_step': 0.009313228607177735, 'td_error': 0.46049351947098777, 'value_scale': -0.04749929603214211, 'discounted_advantage': -0.004635635613804375, 'initial_state': -0.04214290529489517, 'diff_eval': 417.8237719981702} step=156000
2025-12-06 18:04.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.35it/s, vae_loss=0.0105]


2025-12-06 18:04.34 [info     ] PLASWithPerturbation_20251206172719: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.001890497922897339, 'time_algorithm_update': 0.007289314746856689, 'vae_loss': 0.01049149159202352, 'time_step': 0.009409274101257324, 'td_error': 0.4588958142106922, 'value_scale': -0.04891857989866221, 'discounted_advantage': 0.005271805035911451, 'initial_state': -0.04231487214565277, 'diff_eval': 396.7652046100237} step=157000
2025-12-06 18:04.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.62it/s, vae_loss=0.0105]


2025-12-06 18:04.49 [info     ] PLASWithPerturbation_20251206172719: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.0020057570934295652, 'time_algorithm_update': 0.007568079471588135, 'vae_loss': 0.010492662569507956, 'time_step': 0.0098380126953125, 'td_error': 0.4581038797003089, 'value_scale': -0.04980720281374562, 'discounted_advantage': 0.011419973943561382, 'initial_state': -0.04342859238386154, 'diff_eval': 496.64514835807125} step=158000
2025-12-06 18:04.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.70it/s, vae_loss=0.0103]


2025-12-06 18:05.03 [info     ] PLASWithPerturbation_20251206172719: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.001959921360015869, 'time_algorithm_update': 0.007409838199615479, 'vae_loss': 0.010269466929137707, 'time_step': 0.009637218713760377, 'td_error': 0.45927974131605437, 'value_scale': -0.04863083377646006, 'discounted_advantage': 0.0034703287915604265, 'initial_state': -0.04284581169486046, 'diff_eval': 369.8459498317449} step=159000
2025-12-06 18:05.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.94it/s, vae_loss=0.0105] 


2025-12-06 18:05.17 [info     ] PLASWithPerturbation_20251206172719: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.001982564210891724, 'time_algorithm_update': 0.007574942111968994, 'vae_loss': 0.010490157269407064, 'time_step': 0.009805324077606202, 'td_error': 0.4592927547842539, 'value_scale': -0.04874066684874397, 'discounted_advantage': 0.004591291196219745, 'initial_state': -0.042923226952552795, 'diff_eval': 382.4027939887512} step=160000
2025-12-06 18:05.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:10<00:00, 98.39it/s, vae_loss=0.0102]


2025-12-06 18:05.32 [info     ] PLASWithPerturbation_20251206172719: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.002055354118347168, 'time_algorithm_update': 0.007647452592849732, 'vae_loss': 0.010191906337160618, 'time_step': 0.009959000349044799, 'td_error': 0.45882628962416533, 'value_scale': -0.04907765758223051, 'discounted_advantage': 0.0073247911318842195, 'initial_state': -0.04319172725081444, 'diff_eval': 439.6790717775507} step=161000
2025-12-06 18:05.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.71it/s, vae_loss=0.0101]


2025-12-06 18:05.46 [info     ] PLASWithPerturbation_20251206172719: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.0019363319873809813, 'time_algorithm_update': 0.007465349197387695, 'vae_loss': 0.010128505043685437, 'time_step': 0.009640487670898437, 'td_error': 0.45922302253789626, 'value_scale': -0.048759793712424185, 'discounted_advantage': 0.003347830434124927, 'initial_state': -0.04286827892065048, 'diff_eval': 371.92260593374687} step=162000
2025-12-06 18:05.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.57it/s, vae_loss=0.0102]


2025-12-06 18:06.00 [info     ] PLASWithPerturbation_20251206172719: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.0019748735427856444, 'time_algorithm_update': 0.0074533305168151855, 'vae_loss': 0.010169334273785352, 'time_step': 0.009661667346954346, 'td_error': 0.46003639657244166, 'value_scale': -0.04800030832473198, 'discounted_advantage': -0.001760575931049413, 'initial_state': -0.042328327894210815, 'diff_eval': 377.64487172594636} step=163000
2025-12-06 18:06.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:09<00:00, 106.00it/s, vae_loss=0.01]  


2025-12-06 18:06.14 [info     ] PLASWithPerturbation_20251206172719: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.0018459994792938232, 'time_algorithm_update': 0.007149491548538208, 'vae_loss': 0.010037149423733354, 'time_step': 0.009247059345245362, 'td_error': 0.458864203454493, 'value_scale': -0.04899043906386395, 'discounted_advantage': 0.004274667241099647, 'initial_state': -0.0431106761097908, 'diff_eval': 397.71543531762364} step=164000
2025-12-06 18:06.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.29it/s, vae_loss=0.0102]


2025-12-06 18:06.28 [info     ] PLASWithPerturbation_20251206172719: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.00186712646484375, 'time_algorithm_update': 0.0073033061027526856, 'vae_loss': 0.010144850371405483, 'time_step': 0.009403232336044311, 'td_error': 0.45814114431178865, 'value_scale': -0.049676429153219404, 'discounted_advantage': 0.010234283073145493, 'initial_state': -0.043223824352025986, 'diff_eval': 485.305284593807} step=165000
2025-12-06 18:06.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.96it/s, vae_loss=0.00998]


2025-12-06 18:06.42 [info     ] PLASWithPerturbation_20251206172719: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.0019218456745147706, 'time_algorithm_update': 0.007380747318267823, 'vae_loss': 0.0099779329423327, 'time_step': 0.009529629468917847, 'td_error': 0.45922340681895923, 'value_scale': -0.048725190023639474, 'discounted_advantage': 0.003826643837608982, 'initial_state': -0.042818836867809296, 'diff_eval': 364.17145658114435} step=166000
2025-12-06 18:06.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.28it/s, vae_loss=0.0099]


2025-12-06 18:06.56 [info     ] PLASWithPerturbation_20251206172719: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.0018864455223083496, 'time_algorithm_update': 0.007278244256973267, 'vae_loss': 0.0099021684541367, 'time_step': 0.009404942989349366, 'td_error': 0.4591893017711541, 'value_scale': -0.048731253405506755, 'discounted_advantage': 0.0037574054668212545, 'initial_state': -0.04307755455374718, 'diff_eval': 360.98909977223514} step=167000
2025-12-06 18:06.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.00it/s, vae_loss=0.0099]


2025-12-06 18:07.10 [info     ] PLASWithPerturbation_20251206172719: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.0019380970001220703, 'time_algorithm_update': 0.007454340219497681, 'vae_loss': 0.009891612297389656, 'time_step': 0.009622404098510742, 'td_error': 0.4586342445135886, 'value_scale': -0.04931650917870926, 'discounted_advantage': 0.007670381773583478, 'initial_state': -0.043278638273477554, 'diff_eval': 410.41801211451326} step=168000
2025-12-06 18:07.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.37it/s, vae_loss=0.00978]


2025-12-06 18:07.24 [info     ] PLASWithPerturbation_20251206172719: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.0019410171508789063, 'time_algorithm_update': 0.0074075026512145994, 'vae_loss': 0.00979773175669834, 'time_step': 0.009585329294204713, 'td_error': 0.45843896598194517, 'value_scale': -0.04942127747292578, 'discounted_advantage': 0.008640538462198854, 'initial_state': -0.043001748621463776, 'diff_eval': 423.8342066900908} step=169000
2025-12-06 18:07.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.70it/s, vae_loss=0.01]  


2025-12-06 18:07.38 [info     ] PLASWithPerturbation_20251206172719: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.0019159457683563233, 'time_algorithm_update': 0.007362204790115357, 'vae_loss': 0.01001577253243886, 'time_step': 0.00954195213317871, 'td_error': 0.4590357844730414, 'value_scale': -0.04890435749246471, 'discounted_advantage': 0.005564783333985065, 'initial_state': -0.042666565626859665, 'diff_eval': 368.6645596195395} step=170000
2025-12-06 18:07.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.17it/s, vae_loss=0.00977]


2025-12-06 18:07.53 [info     ] PLASWithPerturbation_20251206172719: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.001970567226409912, 'time_algorithm_update': 0.007578057050704956, 'vae_loss': 0.009784676762064919, 'time_step': 0.009795154094696045, 'td_error': 0.4591029800064141, 'value_scale': -0.048831636376119994, 'discounted_advantage': 0.00574461196177666, 'initial_state': -0.04275710880756378, 'diff_eval': 349.1464201236979} step=171000
2025-12-06 18:07.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.31it/s, vae_loss=0.00982]


2025-12-06 18:08.07 [info     ] PLASWithPerturbation_20251206172719: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.0018130195140838622, 'time_algorithm_update': 0.0073006231784820555, 'vae_loss': 0.009822065466083586, 'time_step': 0.009322612762451173, 'td_error': 0.45950933160021584, 'value_scale': -0.048488735431713136, 'discounted_advantage': 0.001550539980497033, 'initial_state': -0.04282615706324577, 'diff_eval': 330.45593158538554} step=172000
2025-12-06 18:08.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.61it/s, vae_loss=0.00956]


2025-12-06 18:08.21 [info     ] PLASWithPerturbation_20251206172719: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.0018566317558288575, 'time_algorithm_update': 0.007484573602676392, 'vae_loss': 0.009553239965811372, 'time_step': 0.009570571422576904, 'td_error': 0.4586938126269857, 'value_scale': -0.049272903654964804, 'discounted_advantage': 0.007678251880376323, 'initial_state': -0.043007608503103256, 'diff_eval': 396.47917318120994} step=173000
2025-12-06 18:08.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.20it/s, vae_loss=0.00959]


2025-12-06 18:08.35 [info     ] PLASWithPerturbation_20251206172719: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.001909905195236206, 'time_algorithm_update': 0.007549989223480225, 'vae_loss': 0.009578068483853713, 'time_step': 0.00969545841217041, 'td_error': 0.4583840050533996, 'value_scale': -0.049447181630311324, 'discounted_advantage': 0.008119615313225319, 'initial_state': -0.04339399188756943, 'diff_eval': 422.30997402131123} step=174000
2025-12-06 18:08.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:10<00:00, 93.79it/s, vae_loss=0.00963]


2025-12-06 18:08.50 [info     ] PLASWithPerturbation_20251206172719: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.0021003024578094484, 'time_algorithm_update': 0.00809679651260376, 'vae_loss': 0.009625519233522936, 'time_step': 0.010442439556121827, 'td_error': 0.46056138992255086, 'value_scale': -0.04746737762425354, 'discounted_advantage': -0.005293520994663763, 'initial_state': -0.042514342814683914, 'diff_eval': 353.03445638491775} step=175000
2025-12-06 18:08.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.62it/s, vae_loss=0.00955]


2025-12-06 18:09.04 [info     ] PLASWithPerturbation_20251206172719: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.001993948221206665, 'time_algorithm_update': 0.007515127897262573, 'vae_loss': 0.009556301427306608, 'time_step': 0.009734627723693847, 'td_error': 0.45980675863678794, 'value_scale': -0.04821958455310983, 'discounted_advantage': 0.00025354056075550894, 'initial_state': -0.04243398830294609, 'diff_eval': 356.21563826103153} step=176000
2025-12-06 18:09.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.88it/s, vae_loss=0.00964]


2025-12-06 18:09.18 [info     ] PLASWithPerturbation_20251206172719: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.001882065773010254, 'time_algorithm_update': 0.007242893695831299, 'vae_loss': 0.009645929272752256, 'time_step': 0.009360469341278077, 'td_error': 0.458572082851672, 'value_scale': -0.04947561354353737, 'discounted_advantage': 0.009714055017222131, 'initial_state': -0.04308202862739563, 'diff_eval': 418.9238587362851} step=177000
2025-12-06 18:09.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.57it/s, vae_loss=0.00939]


2025-12-06 18:09.32 [info     ] PLASWithPerturbation_20251206172719: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.0018821656703948975, 'time_algorithm_update': 0.007350448608398437, 'vae_loss': 0.009382118029985577, 'time_step': 0.009462403297424317, 'td_error': 0.4589061014908401, 'value_scale': -0.04902604054573733, 'discounted_advantage': 0.006328045840905643, 'initial_state': -0.042925119400024414, 'diff_eval': 354.4995439123188} step=178000
2025-12-06 18:09.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:10<00:00, 97.76it/s, vae_loss=0.00943] 


2025-12-06 18:09.46 [info     ] PLASWithPerturbation_20251206172719: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.0018856539726257325, 'time_algorithm_update': 0.007326338052749634, 'vae_loss': 0.00942371555324644, 'time_step': 0.009449053287506103, 'td_error': 0.45930071440534104, 'value_scale': -0.04863262220152341, 'discounted_advantage': 0.0020819750844739155, 'initial_state': -0.04295450076460838, 'diff_eval': 342.554736127413} step=179000
2025-12-06 18:09.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.86it/s, vae_loss=0.00913]


2025-12-06 18:10.01 [info     ] PLASWithPerturbation_20251206172719: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.001852694034576416, 'time_algorithm_update': 0.007629711866378784, 'vae_loss': 0.009135438934899867, 'time_step': 0.009731605291366578, 'td_error': 0.4598704282284178, 'value_scale': -0.04807878019676949, 'discounted_advantage': -0.0012227637620653871, 'initial_state': -0.042533379048109055, 'diff_eval': 332.05165251327384} step=180000
2025-12-06 18:10.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.77it/s, vae_loss=0.0094]


2025-12-06 18:10.15 [info     ] PLASWithPerturbation_20251206172719: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.0018912882804870605, 'time_algorithm_update': 0.007403270006179809, 'vae_loss': 0.009409130941610784, 'time_step': 0.009530576467514039, 'td_error': 0.4587102449165895, 'value_scale': -0.049193722880558614, 'discounted_advantage': 0.007473838736594176, 'initial_state': -0.04309287667274475, 'diff_eval': 366.46048019055894} step=181000
2025-12-06 18:10.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.31it/s, vae_loss=0.00921]


2025-12-06 18:10.29 [info     ] PLASWithPerturbation_20251206172719: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.0018790454864501952, 'time_algorithm_update': 0.0073005475997924806, 'vae_loss': 0.009202906816964968, 'time_step': 0.009407825946807861, 'td_error': 0.4594667080728233, 'value_scale': -0.04844504077852472, 'discounted_advantage': 0.0007074219909486198, 'initial_state': -0.04311671108007431, 'diff_eval': 334.2385541779506} step=182000
2025-12-06 18:10.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:10<00:00, 93.70it/s, vae_loss=0.0092] 


2025-12-06 18:10.44 [info     ] PLASWithPerturbation_20251206172719: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.002628532886505127, 'time_algorithm_update': 0.0075979046821594235, 'vae_loss': 0.009193926530424506, 'time_step': 0.010479498863220214, 'td_error': 0.45938483112050493, 'value_scale': -0.04852901658742733, 'discounted_advantage': 0.001421554343683246, 'initial_state': -0.042936380952596664, 'diff_eval': 331.94687000196683} step=183000
2025-12-06 18:10.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.57it/s, vae_loss=0.00909]


2025-12-06 18:10.58 [info     ] PLASWithPerturbation_20251206172719: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.001850466251373291, 'time_algorithm_update': 0.007305853128433228, 'vae_loss': 0.009090038569411263, 'time_step': 0.009388121366500855, 'td_error': 0.4594628587923983, 'value_scale': -0.04845975579971827, 'discounted_advantage': 0.0023255060497545197, 'initial_state': -0.04274002090096474, 'diff_eval': 309.921650345456} step=184000
2025-12-06 18:10.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.97it/s, vae_loss=0.00913]


2025-12-06 18:11.13 [info     ] PLASWithPerturbation_20251206172719: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.001941115140914917, 'time_algorithm_update': 0.007461697816848755, 'vae_loss': 0.009129905258771031, 'time_step': 0.009633450984954835, 'td_error': 0.45901454620234794, 'value_scale': -0.04887299061880103, 'discounted_advantage': 0.005978050474701164, 'initial_state': -0.04255314916372299, 'diff_eval': 347.8234009952892} step=185000
2025-12-06 18:11.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:10<00:00, 99.53it/s, vae_loss=0.00924]


2025-12-06 18:11.27 [info     ] PLASWithPerturbation_20251206172719: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.0019990384578704836, 'time_algorithm_update': 0.007621332168579102, 'vae_loss': 0.00923944215127267, 'time_step': 0.009857530355453491, 'td_error': 0.4586635292901402, 'value_scale': -0.049275192443425056, 'discounted_advantage': 0.00784999995495965, 'initial_state': -0.043190404772758484, 'diff_eval': 363.6881549712137} step=186000
2025-12-06 18:11.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.62it/s, vae_loss=0.00898]


2025-12-06 18:11.41 [info     ] PLASWithPerturbation_20251206172719: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.0019165265560150146, 'time_algorithm_update': 0.007309708595275879, 'vae_loss': 0.008978589729871601, 'time_step': 0.009458151578903198, 'td_error': 0.4589554502651183, 'value_scale': -0.048929451234491925, 'discounted_advantage': 0.005138305342384531, 'initial_state': -0.04270801693201065, 'diff_eval': 327.8958650970415} step=187000
2025-12-06 18:11.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.52it/s, vae_loss=0.00905]


2025-12-06 18:11.55 [info     ] PLASWithPerturbation_20251206172719: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.0019833495616912843, 'time_algorithm_update': 0.007542359828948975, 'vae_loss': 0.009047562616411596, 'time_step': 0.009759819030761719, 'td_error': 0.4587072074457317, 'value_scale': -0.04929962957376257, 'discounted_advantage': 0.008312449414738054, 'initial_state': -0.04343458265066147, 'diff_eval': 372.1632033331778} step=188000
2025-12-06 18:11.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.40it/s, vae_loss=0.00889]


2025-12-06 18:12.10 [info     ] PLASWithPerturbation_20251206172719: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.00192396879196167, 'time_algorithm_update': 0.0074073121547698975, 'vae_loss': 0.008897894283058122, 'time_step': 0.009581809997558593, 'td_error': 0.4595841253974045, 'value_scale': -0.04843640333737894, 'discounted_advantage': 0.0022310735850294286, 'initial_state': -0.04273691028356552, 'diff_eval': 321.78931094546255} step=189000
2025-12-06 18:12.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.25it/s, vae_loss=0.00874]


2025-12-06 18:12.24 [info     ] PLASWithPerturbation_20251206172719: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.0018413574695587158, 'time_algorithm_update': 0.007249839305877685, 'vae_loss': 0.00873913319176063, 'time_step': 0.00932487440109253, 'td_error': 0.4604170295386243, 'value_scale': -0.0476018182752999, 'discounted_advantage': -0.0038786093809386082, 'initial_state': -0.04229067638516426, 'diff_eval': 322.94885319998195} step=190000
2025-12-06 18:12.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.88it/s, vae_loss=0.00868]


2025-12-06 18:12.38 [info     ] PLASWithPerturbation_20251206172719: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.0018974227905273438, 'time_algorithm_update': 0.00750917911529541, 'vae_loss': 0.008693873936776071, 'time_step': 0.009639803886413573, 'td_error': 0.4584370236350296, 'value_scale': -0.04938700199923466, 'discounted_advantage': 0.008592304711757634, 'initial_state': -0.043220773339271545, 'diff_eval': 387.4523015743774} step=191000
2025-12-06 18:12.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:09<00:00, 103.12it/s, vae_loss=0.00873]


2025-12-06 18:12.52 [info     ] PLASWithPerturbation_20251206172719: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.00191127347946167, 'time_algorithm_update': 0.0073552122116088865, 'vae_loss': 0.008720851212507114, 'time_step': 0.009509334325790405, 'td_error': 0.4600735747506484, 'value_scale': -0.04791092958257589, 'discounted_advantage': -0.00031324747485082853, 'initial_state': -0.04223819822072983, 'diff_eval': 320.4623185757281} step=192000
2025-12-06 18:12.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.74it/s, vae_loss=0.00887]


2025-12-06 18:13.06 [info     ] PLASWithPerturbation_20251206172719: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.0019716966152191163, 'time_algorithm_update': 0.007532781600952149, 'vae_loss': 0.008863833824405446, 'time_step': 0.009738294839859008, 'td_error': 0.4593458813066759, 'value_scale': -0.04858936448499548, 'discounted_advantage': 0.003622754555475624, 'initial_state': -0.04280417412519455, 'diff_eval': 302.00749394434644} step=193000
2025-12-06 18:13.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:09<00:00, 101.32it/s, vae_loss=0.00857]


2025-12-06 18:13.20 [info     ] PLASWithPerturbation_20251206172719: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.0019322938919067382, 'time_algorithm_update': 0.007487429618835449, 'vae_loss': 0.008554903883254155, 'time_step': 0.009675349950790404, 'td_error': 0.45858937929480514, 'value_scale': -0.04931369182097117, 'discounted_advantage': 0.0073340906249986584, 'initial_state': -0.043248873203992844, 'diff_eval': 349.02751400799104} step=194000
2025-12-06 18:13.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.17it/s, vae_loss=0.00874]


2025-12-06 18:13.34 [info     ] PLASWithPerturbation_20251206172719: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.0019090189933776856, 'time_algorithm_update': 0.0074596421718597416, 'vae_loss': 0.008732321658171713, 'time_step': 0.009598177909851074, 'td_error': 0.460054171659782, 'value_scale': -0.047933696456915524, 'discounted_advantage': -0.0007445546452193449, 'initial_state': -0.04259810596704483, 'diff_eval': 300.6249318592801} step=195000
2025-12-06 18:13.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.46it/s, vae_loss=0.0087]


2025-12-06 18:13.48 [info     ] PLASWithPerturbation_20251206172719: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.0019736301898956297, 'time_algorithm_update': 0.007567811727523804, 'vae_loss': 0.008693237447412685, 'time_step': 0.009774370908737182, 'td_error': 0.4592800792712206, 'value_scale': -0.048607344291042555, 'discounted_advantage': 0.0027928023158303368, 'initial_state': -0.04286770895123482, 'diff_eval': 296.96277003114193} step=196000
2025-12-06 18:13.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:09<00:00, 102.66it/s, vae_loss=0.00861]


2025-12-06 18:14.03 [info     ] PLASWithPerturbation_20251206172719: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.0019013833999633788, 'time_algorithm_update': 0.007420088529586792, 'vae_loss': 0.008609556041425093, 'time_step': 0.009558268308639527, 'td_error': 0.4583574355910885, 'value_scale': -0.04950874732738581, 'discounted_advantage': 0.009389375638842812, 'initial_state': -0.043395452201366425, 'diff_eval': 391.81493756973566} step=197000
2025-12-06 18:14.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:09<00:00, 100.88it/s, vae_loss=0.00838]


2025-12-06 18:14.17 [info     ] PLASWithPerturbation_20251206172719: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.001970217227935791, 'time_algorithm_update': 0.007498673915863037, 'vae_loss': 0.008369531156495214, 'time_step': 0.00971175718307495, 'td_error': 0.4594887358601275, 'value_scale': -0.04838023204528176, 'discounted_advantage': 0.0003377393919136802, 'initial_state': -0.04247620701789856, 'diff_eval': 310.5447479368794} step=198000
2025-12-06 18:14.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:09<00:00, 104.80it/s, vae_loss=0.00841]


2025-12-06 18:14.31 [info     ] PLASWithPerturbation_20251206172719: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.0018682625293731689, 'time_algorithm_update': 0.007266947269439697, 'vae_loss': 0.008388856834266335, 'time_step': 0.009365395307540894, 'td_error': 0.45925945883160024, 'value_scale': -0.04867783954526295, 'discounted_advantage': 0.004722240323557887, 'initial_state': -0.04258188605308533, 'diff_eval': 305.6562172198901} step=199000
2025-12-06 18:14.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:09<00:00, 105.96it/s, vae_loss=0.00852]


2025-12-06 18:14.45 [info     ] PLASWithPerturbation_20251206172719: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.0018119101524353028, 'time_algorithm_update': 0.007223944902420044, 'vae_loss': 0.00851808579824865, 'time_step': 0.009257846832275391, 'td_error': 0.4587519497778938, 'value_scale': -0.04912533813522315, 'discounted_advantage': 0.005794085239475899, 'initial_state': -0.042939916253089905, 'diff_eval': 325.78746209722874} step=200000
2025-12-06 18:14.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\PLASWithPerturbation_20251206172719\model_200000.d3
Training model:  TD3PlusBC
2025-12-06 18:14.45 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=1)
2025-12-06 18:14.45

Epoch 1/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.87it/s, critic_loss=0.209, actor_loss=-2.22, bc_loss=0.115]


2025-12-06 18:15.05 [info     ] TD3PlusBC_20251206181445: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.00464168906211853, 'time_algorithm_update': 0.011534927129745484, 'critic_loss': 0.21041703753173352, 'actor_loss': -2.2163899915218352, 'bc_loss': 0.11434257636219262, 'time_step': 0.016408453941345214, 'td_error': 0.7054682940239474, 'value_scale': 1.592172954289269, 'discounted_advantage': -1.6250830371562779, 'initial_state': 1.4322048425674438, 'diff_eval': 4015.743591628245} step=1000
2025-12-06 18:15.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.84it/s, critic_loss=0.617, actor_loss=-2.21, bc_loss=0.0741]


2025-12-06 18:15.25 [info     ] TD3PlusBC_20251206181445: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.0045263729095458985, 'time_algorithm_update': 0.011374311447143555, 'critic_loss': 0.6202613523006439, 'actor_loss': -2.205834233522415, 'bc_loss': 0.07397838999330998, 'time_step': 0.01614336895942688, 'td_error': 1.1533673050362123, 'value_scale': 2.843710372471918, 'discounted_advantage': -3.281073750533746, 'initial_state': 2.4615721702575684, 'diff_eval': 2986.78309689375} step=2000
2025-12-06 18:15.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.15it/s, critic_loss=1.5, actor_loss=-2.19, bc_loss=0.0567]


2025-12-06 18:15.45 [info     ] TD3PlusBC_20251206181445: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.004576773643493653, 'time_algorithm_update': 0.011505327939987183, 'critic_loss': 1.5064249534010887, 'actor_loss': -2.1948654663562777, 'bc_loss': 0.05656622710078955, 'time_step': 0.016323306798934935, 'td_error': 1.4982539520746754, 'value_scale': 3.6012477569398373, 'discounted_advantage': -3.9746584322163185, 'initial_state': 2.5989761352539062, 'diff_eval': 2431.5353841557176} step=3000
2025-12-06 18:15.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.79it/s, critic_loss=2.69, actor_loss=-2.2, bc_loss=0.0496]


2025-12-06 18:16.05 [info     ] TD3PlusBC_20251206181445: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.0047121686935424805, 'time_algorithm_update': 0.011778316259384155, 'critic_loss': 2.6998360648155213, 'actor_loss': -2.1969948496818543, 'bc_loss': 0.04966912263631821, 'time_step': 0.016720802307128906, 'td_error': 1.6907159194934953, 'value_scale': 4.446918320769487, 'discounted_advantage': -5.293335161139682, 'initial_state': 3.9904632568359375, 'diff_eval': 2175.5051954130536} step=4000
2025-12-06 18:16.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.78it/s, critic_loss=4.01, actor_loss=-2.21, bc_loss=0.0473]


2025-12-06 18:16.25 [info     ] TD3PlusBC_20251206181445: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.0046092491149902345, 'time_algorithm_update': 0.011572801113128662, 'critic_loss': 4.017102929830551, 'actor_loss': -2.2134848074913025, 'bc_loss': 0.047232280369848015, 'time_step': 0.01642847204208374, 'td_error': 1.8068382736068689, 'value_scale': 4.878190427225814, 'discounted_advantage': -5.101837868869453, 'initial_state': 4.005593776702881, 'diff_eval': 2036.964147362516} step=5000
2025-12-06 18:16.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.23it/s, critic_loss=5.2, actor_loss=-2.22, bc_loss=0.047] 


2025-12-06 18:16.45 [info     ] TD3PlusBC_20251206181445: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.004660210609436035, 'time_algorithm_update': 0.011697014331817628, 'critic_loss': 5.20509029841423, 'actor_loss': -2.221031768321991, 'bc_loss': 0.04696830651164055, 'time_step': 0.016594548463821412, 'td_error': 1.9538996548446868, 'value_scale': 5.131901448936032, 'discounted_advantage': -5.663300924759659, 'initial_state': 4.307797431945801, 'diff_eval': 1983.4145287745487} step=6000
2025-12-06 18:16.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.74it/s, critic_loss=5.83, actor_loss=-2.24, bc_loss=0.0477]


2025-12-06 18:17.06 [info     ] TD3PlusBC_20251206181445: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.004744372606277466, 'time_algorithm_update': 0.011724543809890747, 'critic_loss': 5.832723915100098, 'actor_loss': -2.2442537913322447, 'bc_loss': 0.047752566546201705, 'time_step': 0.016719289779663086, 'td_error': 1.8331395745224701, 'value_scale': 5.290159154639245, 'discounted_advantage': -4.916469495035071, 'initial_state': 5.196284294128418, 'diff_eval': 1915.5905466366735} step=7000
2025-12-06 18:17.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.41it/s, critic_loss=6.02, actor_loss=-2.25, bc_loss=0.0524]


2025-12-06 18:17.26 [info     ] TD3PlusBC_20251206181445: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.00462279224395752, 'time_algorithm_update': 0.011663182020187377, 'critic_loss': 6.022510985612869, 'actor_loss': -2.2487068519592284, 'bc_loss': 0.0524213986992836, 'time_step': 0.0165352566242218, 'td_error': 1.9066591963025785, 'value_scale': 5.4685513275517, 'discounted_advantage': -5.101464572105582, 'initial_state': 5.831958770751953, 'diff_eval': 2076.8833870792528} step=8000
2025-12-06 18:17.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.03it/s, critic_loss=6.13, actor_loss=-2.25, bc_loss=0.056]


2025-12-06 18:17.46 [info     ] TD3PlusBC_20251206181445: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.004562479257583618, 'time_algorithm_update': 0.011545572757720947, 'critic_loss': 6.134086019039154, 'actor_loss': -2.253333788394928, 'bc_loss': 0.05600678250938654, 'time_step': 0.01636303496360779, 'td_error': 2.013827435284377, 'value_scale': 5.563889662093465, 'discounted_advantage': -5.25249193477344, 'initial_state': 5.998031139373779, 'diff_eval': 1958.3864297967164} step=9000
2025-12-06 18:17.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.21it/s, critic_loss=6.28, actor_loss=-2.26, bc_loss=0.0592]


2025-12-06 18:18.07 [info     ] TD3PlusBC_20251206181445: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.0046475367546081545, 'time_algorithm_update': 0.011968517541885377, 'critic_loss': 6.279000549554825, 'actor_loss': -2.2580749459266665, 'bc_loss': 0.05927234598994255, 'time_step': 0.01686206531524658, 'td_error': 2.132896943956536, 'value_scale': 5.797315442860665, 'discounted_advantage': -5.490086142338823, 'initial_state': 6.7162370681762695, 'diff_eval': 2350.962999371649} step=10000
2025-12-06 18:18.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.40it/s, critic_loss=6.39, actor_loss=-2.26, bc_loss=0.0632]


2025-12-06 18:18.28 [info     ] TD3PlusBC_20251206181445: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.004951364517211914, 'time_algorithm_update': 0.012146446943283082, 'critic_loss': 6.39045722413063, 'actor_loss': -2.2597827415466307, 'bc_loss': 0.06318331212550402, 'time_step': 0.017375009775161744, 'td_error': 2.2809597689173686, 'value_scale': 6.249124974686862, 'discounted_advantage': -5.398975681063068, 'initial_state': 7.257847785949707, 'diff_eval': 2472.876791158061} step=11000
2025-12-06 18:18.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.57it/s, critic_loss=6.68, actor_loss=-2.27, bc_loss=0.0665]


2025-12-06 18:18.48 [info     ] TD3PlusBC_20251206181445: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.0048874480724334715, 'time_algorithm_update': 0.011929777860641479, 'critic_loss': 6.688712349414826, 'actor_loss': -2.267475766181946, 'bc_loss': 0.06647738815099001, 'time_step': 0.01705935502052307, 'td_error': 2.482648831007941, 'value_scale': 6.653914877053503, 'discounted_advantage': -5.949284555150006, 'initial_state': 7.551959037780762, 'diff_eval': 2443.8453536822685} step=12000
2025-12-06 18:18.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.42it/s, critic_loss=7.06, actor_loss=-2.27, bc_loss=0.0711]


2025-12-06 18:19.09 [info     ] TD3PlusBC_20251206181445: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.004735108137130737, 'time_algorithm_update': 0.01178874683380127, 'critic_loss': 7.063935122489929, 'actor_loss': -2.272165750980377, 'bc_loss': 0.07112162590026855, 'time_step': 0.016784412622451783, 'td_error': 2.8321191801954755, 'value_scale': 7.161925452573214, 'discounted_advantage': -8.928629112893788, 'initial_state': 8.194269180297852, 'diff_eval': 4113.445090132682} step=13000
2025-12-06 18:19.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.20it/s, critic_loss=7.45, actor_loss=-2.28, bc_loss=0.0766]


2025-12-06 18:19.29 [info     ] TD3PlusBC_20251206181445: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.004619424343109131, 'time_algorithm_update': 0.011689392805099487, 'critic_loss': 7.459208159446717, 'actor_loss': -2.278130575180054, 'bc_loss': 0.07660935559868813, 'time_step': 0.016571898460388184, 'td_error': 3.2487541693015376, 'value_scale': 7.767887853623086, 'discounted_advantage': -8.802243139648002, 'initial_state': 7.628817081451416, 'diff_eval': 3006.7911016480753} step=14000
2025-12-06 18:19.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.92it/s, critic_loss=7.97, actor_loss=-2.29, bc_loss=0.0758]


2025-12-06 18:19.50 [info     ] TD3PlusBC_20251206181445: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.004728528022766113, 'time_algorithm_update': 0.011681137800216675, 'critic_loss': 7.974485413789749, 'actor_loss': -2.290769844532013, 'bc_loss': 0.07574913231283427, 'time_step': 0.016653172969818116, 'td_error': 3.3863938450453226, 'value_scale': 8.96104150717377, 'discounted_advantage': -11.115171482194942, 'initial_state': 9.32800006866455, 'diff_eval': 4175.9750974727285} step=15000
2025-12-06 18:19.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.79it/s, critic_loss=8.71, actor_loss=-2.3, bc_loss=0.0754]


2025-12-06 18:20.11 [info     ] TD3PlusBC_20251206181445: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.004644731760025025, 'time_algorithm_update': 0.011538986921310425, 'critic_loss': 8.70689719748497, 'actor_loss': -2.300960723400116, 'bc_loss': 0.07536967227607966, 'time_step': 0.0164331796169281, 'td_error': 4.216281754114997, 'value_scale': 9.613947639262111, 'discounted_advantage': -13.9517211839162, 'initial_state': 9.12632942199707, 'diff_eval': 4073.9941958942436} step=16000
2025-12-06 18:20.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.31it/s, critic_loss=9.52, actor_loss=-2.31, bc_loss=0.0754]


2025-12-06 18:20.31 [info     ] TD3PlusBC_20251206181445: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.004813136339187622, 'time_algorithm_update': 0.01177095103263855, 'critic_loss': 9.520501399040223, 'actor_loss': -2.310751224040985, 'bc_loss': 0.07537058406323195, 'time_step': 0.016832434177398683, 'td_error': 4.159092687075065, 'value_scale': 10.349533321328105, 'discounted_advantage': -12.33082883584081, 'initial_state': 9.328536987304688, 'diff_eval': 3277.962409451688} step=17000
2025-12-06 18:20.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.87it/s, critic_loss=10.3, actor_loss=-2.32, bc_loss=0.0735]


2025-12-06 18:20.52 [info     ] TD3PlusBC_20251206181445: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.004816525936126709, 'time_algorithm_update': 0.011882204055786133, 'critic_loss': 10.306861169815063, 'actor_loss': -2.3236850628852843, 'bc_loss': 0.07348053619265556, 'time_step': 0.01696144700050354, 'td_error': 4.583176074182303, 'value_scale': 11.399346539174571, 'discounted_advantage': -14.457278604809273, 'initial_state': 10.72780990600586, 'diff_eval': 3497.9214330687773} step=18000
2025-12-06 18:20.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.51it/s, critic_loss=11.2, actor_loss=-2.33, bc_loss=0.0727]


2025-12-06 18:21.12 [info     ] TD3PlusBC_20251206181445: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.004723564624786377, 'time_algorithm_update': 0.0117632737159729, 'critic_loss': 11.225964694976806, 'actor_loss': -2.3318575792312624, 'bc_loss': 0.07265483509004116, 'time_step': 0.016750406980514526, 'td_error': 5.244941444442131, 'value_scale': 12.151074157076929, 'discounted_advantage': -15.056066451449995, 'initial_state': 10.3346586227417, 'diff_eval': 3970.525223817124} step=19000
2025-12-06 18:21.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.04it/s, critic_loss=12.1, actor_loss=-2.34, bc_loss=0.0707]


2025-12-06 18:21.32 [info     ] TD3PlusBC_20251206181445: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.004586151361465454, 'time_algorithm_update': 0.0115253427028656, 'critic_loss': 12.074999840259553, 'actor_loss': -2.3412385358810424, 'bc_loss': 0.07077177988737822, 'time_step': 0.016357234954833984, 'td_error': 5.116724389656666, 'value_scale': 13.312117054955433, 'discounted_advantage': -15.795446868887803, 'initial_state': 12.47045612335205, 'diff_eval': 3324.131721440427} step=20000
2025-12-06 18:21.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.86it/s, critic_loss=13.2, actor_loss=-2.35, bc_loss=0.0712]


2025-12-06 18:21.52 [info     ] TD3PlusBC_20251206181445: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.004623433113098144, 'time_algorithm_update': 0.011561506032943725, 'critic_loss': 13.19799908065796, 'actor_loss': -2.345321258068085, 'bc_loss': 0.07112106788158416, 'time_step': 0.016420869827270507, 'td_error': 5.745325404208134, 'value_scale': 14.142970576267205, 'discounted_advantage': -18.252852525325192, 'initial_state': 12.603729248046875, 'diff_eval': 3190.1540743057276} step=21000
2025-12-06 18:21.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.14it/s, critic_loss=14.3, actor_loss=-2.35, bc_loss=0.0696]


2025-12-06 18:22.12 [info     ] TD3PlusBC_20251206181445: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.004542804002761841, 'time_algorithm_update': 0.011551198244094849, 'critic_loss': 14.323525401592255, 'actor_loss': -2.3533354063034055, 'bc_loss': 0.06958469542860984, 'time_step': 0.01634085488319397, 'td_error': 6.79527521706887, 'value_scale': 14.958367132251944, 'discounted_advantage': -18.259671675144602, 'initial_state': 11.6896333694458, 'diff_eval': 3177.2839987338975} step=22000
2025-12-06 18:22.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.30it/s, critic_loss=14.9, actor_loss=-2.36, bc_loss=0.0685]


2025-12-06 18:22.33 [info     ] TD3PlusBC_20251206181445: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.004626275777816773, 'time_algorithm_update': 0.011694273471832276, 'critic_loss': 14.95099993276596, 'actor_loss': -2.3580388259887695, 'bc_loss': 0.06848079484701157, 'time_step': 0.016566993474960327, 'td_error': 6.14861163021362, 'value_scale': 15.896147758335387, 'discounted_advantage': -17.507014345520464, 'initial_state': 14.479896545410156, 'diff_eval': 2484.5190750752613} step=23000
2025-12-06 18:22.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.22it/s, critic_loss=16.1, actor_loss=-2.36, bc_loss=0.0683]


2025-12-06 18:22.53 [info     ] TD3PlusBC_20251206181445: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.004654810667037964, 'time_algorithm_update': 0.011699369192123414, 'critic_loss': 16.087827570915223, 'actor_loss': -2.362887451171875, 'bc_loss': 0.06829103049635887, 'time_step': 0.016593404293060304, 'td_error': 6.145069099412302, 'value_scale': 16.26981800340884, 'discounted_advantage': -18.3614406125231, 'initial_state': 14.649084091186523, 'diff_eval': 2366.6321562310004} step=24000
2025-12-06 18:22.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.04it/s, critic_loss=17.2, actor_loss=-2.37, bc_loss=0.0671]


2025-12-06 18:23.13 [info     ] TD3PlusBC_20251206181445: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.004668581247329712, 'time_algorithm_update': 0.011675541400909423, 'critic_loss': 17.23553784751892, 'actor_loss': -2.36958331489563, 'bc_loss': 0.0670891431644559, 'time_step': 0.016611568689346313, 'td_error': 7.323785000709456, 'value_scale': 17.779179771475288, 'discounted_advantage': -22.29285431829437, 'initial_state': 15.269641876220703, 'diff_eval': 2375.615936615987} step=25000
2025-12-06 18:23.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.34it/s, critic_loss=18.8, actor_loss=-2.37, bc_loss=0.0665]


2025-12-06 18:23.34 [info     ] TD3PlusBC_20251206181445: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.004721406698226929, 'time_algorithm_update': 0.011831279993057251, 'critic_loss': 18.823928427696227, 'actor_loss': -2.3731673488616942, 'bc_loss': 0.06642996766418219, 'time_step': 0.01681303572654724, 'td_error': 8.140748089515835, 'value_scale': 19.245225784435167, 'discounted_advantage': -23.110661290694402, 'initial_state': 16.34139060974121, 'diff_eval': 2977.8284452347552} step=26000
2025-12-06 18:23.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.46it/s, critic_loss=20.7, actor_loss=-2.38, bc_loss=0.0663]


2025-12-06 18:23.54 [info     ] TD3PlusBC_20251206181445: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.004817152023315429, 'time_algorithm_update': 0.012013215780258178, 'critic_loss': 20.70677282142639, 'actor_loss': -2.3769266290664675, 'bc_loss': 0.06631740902364254, 'time_step': 0.017085592031478882, 'td_error': 8.966393410760938, 'value_scale': 19.716414025102562, 'discounted_advantage': -24.58847071002143, 'initial_state': 15.4825439453125, 'diff_eval': 2694.9641530736344} step=27000
2025-12-06 18:23.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.26it/s, critic_loss=22.3, actor_loss=-2.38, bc_loss=0.0655]


2025-12-06 18:24.15 [info     ] TD3PlusBC_20251206181445: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.0047201838493347165, 'time_algorithm_update': 0.011621135234832764, 'critic_loss': 22.364041535377503, 'actor_loss': -2.380091323375702, 'bc_loss': 0.0653965439721942, 'time_step': 0.01658121395111084, 'td_error': 10.309906131197465, 'value_scale': 21.077737688785614, 'discounted_advantage': -30.25684701922648, 'initial_state': 16.86912727355957, 'diff_eval': 2546.108648920322} step=28000
2025-12-06 18:24.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.78it/s, critic_loss=24.4, actor_loss=-2.38, bc_loss=0.0648]


2025-12-06 18:24.35 [info     ] TD3PlusBC_20251206181445: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.004720054149627686, 'time_algorithm_update': 0.01172249412536621, 'critic_loss': 24.44275713443756, 'actor_loss': -2.384859531879425, 'bc_loss': 0.06471784515678883, 'time_step': 0.016695525646209717, 'td_error': 10.556149108326602, 'value_scale': 22.317152812032454, 'discounted_advantage': -28.60994306343166, 'initial_state': 16.98553466796875, 'diff_eval': 2331.621130871836} step=29000
2025-12-06 18:24.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.78it/s, critic_loss=26.4, actor_loss=-2.39, bc_loss=0.0635]


2025-12-06 18:24.55 [info     ] TD3PlusBC_20251206181445: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.004707486629486084, 'time_algorithm_update': 0.011757468700408936, 'critic_loss': 26.39375909614563, 'actor_loss': -2.38743292760849, 'bc_loss': 0.06345635293424129, 'time_step': 0.016714269161224364, 'td_error': 11.296304320803259, 'value_scale': 23.60158165616014, 'discounted_advantage': -30.588996281269534, 'initial_state': 18.230205535888672, 'diff_eval': 2984.2822349284147} step=30000
2025-12-06 18:24.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.26it/s, critic_loss=28.8, actor_loss=-2.39, bc_loss=0.0634]


2025-12-06 18:25.16 [info     ] TD3PlusBC_20251206181445: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.004739689111709595, 'time_algorithm_update': 0.011874197959899902, 'critic_loss': 28.814654108047485, 'actor_loss': -2.389930291175842, 'bc_loss': 0.06333546590805053, 'time_step': 0.016855897426605225, 'td_error': 12.573067885599892, 'value_scale': 25.332009614519432, 'discounted_advantage': -36.1638281788897, 'initial_state': 20.832067489624023, 'diff_eval': 2892.7109829509413} step=31000
2025-12-06 18:25.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.62it/s, critic_loss=31.8, actor_loss=-2.39, bc_loss=0.0627]


2025-12-06 18:25.36 [info     ] TD3PlusBC_20251206181445: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.004760468482971191, 'time_algorithm_update': 0.011745362997055054, 'critic_loss': 31.76968709564209, 'actor_loss': -2.3935422224998475, 'bc_loss': 0.06276358599960805, 'time_step': 0.016757266759872436, 'td_error': 12.475327521639938, 'value_scale': 27.302368779759618, 'discounted_advantage': -34.48832160595879, 'initial_state': 22.794845581054688, 'diff_eval': 2575.8203718122363} step=32000
2025-12-06 18:25.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.29it/s, critic_loss=34.1, actor_loss=-2.4, bc_loss=0.0624]


2025-12-06 18:25.56 [info     ] TD3PlusBC_20251206181445: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.004633501529693603, 'time_algorithm_update': 0.011697013139724731, 'critic_loss': 34.139086965560914, 'actor_loss': -2.3966823654174805, 'bc_loss': 0.06235051625967026, 'time_step': 0.016575536727905273, 'td_error': 16.431830307669344, 'value_scale': 28.673811114787405, 'discounted_advantage': -41.54408576243592, 'initial_state': 20.079730987548828, 'diff_eval': 2613.94094849786} step=33000
2025-12-06 18:25.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.34it/s, critic_loss=37.5, actor_loss=-2.4, bc_loss=0.0614]


2025-12-06 18:26.17 [info     ] TD3PlusBC_20251206181445: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.004642637729644775, 'time_algorithm_update': 0.011654922485351563, 'critic_loss': 37.600901695251466, 'actor_loss': -2.3996785945892336, 'bc_loss': 0.06135152268409729, 'time_step': 0.016541843652725218, 'td_error': 15.4518473508252, 'value_scale': 30.571338215831915, 'discounted_advantage': -38.74017161866378, 'initial_state': 22.700342178344727, 'diff_eval': 2545.885619049874} step=34000
2025-12-06 18:26.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.84it/s, critic_loss=41, actor_loss=-2.4, bc_loss=0.0604] 


2025-12-06 18:26.36 [info     ] TD3PlusBC_20251206181445: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.004518053770065308, 'time_algorithm_update': 0.01140592074394226, 'critic_loss': 41.03606543159485, 'actor_loss': -2.4031966795921327, 'bc_loss': 0.06032657656818628, 'time_step': 0.01616030740737915, 'td_error': 15.893681947586646, 'value_scale': 32.37165711959395, 'discounted_advantage': -41.076176014731466, 'initial_state': 24.635820388793945, 'diff_eval': 2334.195460219808} step=35000
2025-12-06 18:26.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.70it/s, critic_loss=45.9, actor_loss=-2.41, bc_loss=0.0598]


2025-12-06 18:26.57 [info     ] TD3PlusBC_20251206181445: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.004619734764099121, 'time_algorithm_update': 0.011580427408218383, 'critic_loss': 45.892504969596864, 'actor_loss': -2.4059444551467895, 'bc_loss': 0.05977734155207872, 'time_step': 0.016448217153549195, 'td_error': 19.506850032098132, 'value_scale': 35.023208596093355, 'discounted_advantage': -50.12266524392698, 'initial_state': 25.963354110717773, 'diff_eval': 2752.0112899510245} step=36000
2025-12-06 18:26.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.75it/s, critic_loss=50.1, actor_loss=-2.41, bc_loss=0.0591]


2025-12-06 18:27.17 [info     ] TD3PlusBC_20251206181445: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.00470150351524353, 'time_algorithm_update': 0.011783724546432495, 'critic_loss': 50.08814860916138, 'actor_loss': -2.4099297828674318, 'bc_loss': 0.05908264469355345, 'time_step': 0.016720037698745728, 'td_error': 21.164048597352675, 'value_scale': 36.182377796815295, 'discounted_advantage': -51.81288640625571, 'initial_state': 25.790800094604492, 'diff_eval': 2228.3594649927636} step=37000
2025-12-06 18:27.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.27it/s, critic_loss=55.5, actor_loss=-2.41, bc_loss=0.0591]


2025-12-06 18:27.37 [info     ] TD3PlusBC_20251206181445: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.004685277462005615, 'time_algorithm_update': 0.011642746448516846, 'critic_loss': 55.653503631591796, 'actor_loss': -2.411875443458557, 'bc_loss': 0.05911123540997505, 'time_step': 0.016571988344192504, 'td_error': 19.153994367282213, 'value_scale': 39.14987742383742, 'discounted_advantage': -49.494976418762135, 'initial_state': 29.891324996948242, 'diff_eval': 2382.482988243337} step=38000
2025-12-06 18:27.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.31it/s, critic_loss=59.4, actor_loss=-2.41, bc_loss=0.0583]


2025-12-06 18:27.59 [info     ] TD3PlusBC_20251206181445: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.00480743956565857, 'time_algorithm_update': 0.012335325717926025, 'critic_loss': 59.49547877120972, 'actor_loss': -2.4138621854782105, 'bc_loss': 0.058338993817567825, 'time_step': 0.017415536880493163, 'td_error': 23.39191754038063, 'value_scale': 40.81866975271355, 'discounted_advantage': -59.31607696858632, 'initial_state': 30.192665100097656, 'diff_eval': 2166.461894740418} step=39000
2025-12-06 18:27.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.36it/s, critic_loss=66.2, actor_loss=-2.41, bc_loss=0.0588]


2025-12-06 18:28.19 [info     ] TD3PlusBC_20251206181445: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.004711006879806519, 'time_algorithm_update': 0.011839307308197022, 'critic_loss': 66.18609501457215, 'actor_loss': -2.414815977096558, 'bc_loss': 0.05878706647455692, 'time_step': 0.01681666374206543, 'td_error': 22.20337271593175, 'value_scale': 43.83030783335701, 'discounted_advantage': -58.31053435591159, 'initial_state': 32.508567810058594, 'diff_eval': 2424.89788547767} step=40000
2025-12-06 18:28.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.21it/s, critic_loss=72.7, actor_loss=-2.42, bc_loss=0.0578]


2025-12-06 18:28.40 [info     ] TD3PlusBC_20251206181445: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.004761515617370605, 'time_algorithm_update': 0.011827351570129395, 'critic_loss': 72.92593103027343, 'actor_loss': -2.418552296638489, 'bc_loss': 0.05782523562014103, 'time_step': 0.016846039295196534, 'td_error': 21.73886388911082, 'value_scale': 45.92011798396145, 'discounted_advantage': -58.704380037906056, 'initial_state': 36.773643493652344, 'diff_eval': 2344.844810305524} step=41000
2025-12-06 18:28.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.92it/s, critic_loss=78.7, actor_loss=-2.42, bc_loss=0.0576]


2025-12-06 18:29.00 [info     ] TD3PlusBC_20251206181445: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.0046974380016326905, 'time_algorithm_update': 0.01173052954673767, 'critic_loss': 78.8988604927063, 'actor_loss': -2.4214472579956055, 'bc_loss': 0.05766557051986456, 'time_step': 0.016666101932525636, 'td_error': 23.99560538776673, 'value_scale': 48.62304202407249, 'discounted_advantage': -67.97510439698905, 'initial_state': 39.24930191040039, 'diff_eval': 2302.1529676655286} step=42000
2025-12-06 18:29.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.65it/s, critic_loss=85.9, actor_loss=-2.42, bc_loss=0.0577]


2025-12-06 18:29.20 [info     ] TD3PlusBC_20251206181445: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.004688677549362183, 'time_algorithm_update': 0.011813047885894776, 'critic_loss': 85.72184282493592, 'actor_loss': -2.422859094619751, 'bc_loss': 0.0577976010069251, 'time_step': 0.01676002860069275, 'td_error': 26.194209972051965, 'value_scale': 52.00867815919015, 'discounted_advantage': -67.64454524540827, 'initial_state': 40.83729553222656, 'diff_eval': 2575.7843206318685} step=43000
2025-12-06 18:29.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.51it/s, critic_loss=92.1, actor_loss=-2.43, bc_loss=0.0565]


2025-12-06 18:29.41 [info     ] TD3PlusBC_20251206181445: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.004715507507324219, 'time_algorithm_update': 0.011805886507034302, 'critic_loss': 92.32697680473328, 'actor_loss': -2.4253843870162966, 'bc_loss': 0.05651346168667078, 'time_step': 0.016774256706237792, 'td_error': 24.75694619424844, 'value_scale': 53.904834020673626, 'discounted_advantage': -69.88319843359345, 'initial_state': 42.77573776245117, 'diff_eval': 2081.693567483883} step=44000
2025-12-06 18:29.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:19<00:00, 52.55it/s, critic_loss=106, actor_loss=-2.43, bc_loss=0.0551]


2025-12-06 18:30.03 [info     ] TD3PlusBC_20251206181445: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.0057516138553619384, 'time_algorithm_update': 0.012737659454345704, 'critic_loss': 105.70025415420533, 'actor_loss': -2.4287930879592894, 'bc_loss': 0.05504799162596464, 'time_step': 0.018725508213043213, 'td_error': 30.28506999224816, 'value_scale': 57.927480147800225, 'discounted_advantage': -77.9773858747956, 'initial_state': 45.110496520996094, 'diff_eval': 2279.8761604448546} step=45000
2025-12-06 18:30.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.91it/s, critic_loss=115, actor_loss=-2.43, bc_loss=0.0548]


2025-12-06 18:30.24 [info     ] TD3PlusBC_20251206181445: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.004774191617965698, 'time_algorithm_update': 0.011920602798461914, 'critic_loss': 114.69193063735962, 'actor_loss': -2.430576170921326, 'bc_loss': 0.054747428894042965, 'time_step': 0.016942264795303346, 'td_error': 32.58225245973844, 'value_scale': 60.9457292280348, 'discounted_advantage': -75.98856070638946, 'initial_state': 47.128414154052734, 'diff_eval': 2208.277395257512} step=46000
2025-12-06 18:30.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.53it/s, critic_loss=124, actor_loss=-2.43, bc_loss=0.055]


2025-12-06 18:30.44 [info     ] TD3PlusBC_20251206181445: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.004630310535430908, 'time_algorithm_update': 0.011570060729980469, 'critic_loss': 124.03003137588502, 'actor_loss': -2.431719405174255, 'bc_loss': 0.05496605157107115, 'time_step': 0.016466334104537964, 'td_error': 32.658528612845366, 'value_scale': 63.4533515055941, 'discounted_advantage': -80.44918815757376, 'initial_state': 50.221065521240234, 'diff_eval': 2120.7200145315796} step=47000
2025-12-06 18:30.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.29it/s, critic_loss=138, actor_loss=-2.43, bc_loss=0.0541]


2025-12-06 18:31.04 [info     ] TD3PlusBC_20251206181445: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.004494415998458862, 'time_algorithm_update': 0.011789329767227174, 'critic_loss': 137.86229048156738, 'actor_loss': -2.433209599018097, 'bc_loss': 0.05417102081328631, 'time_step': 0.0165466685295105, 'td_error': 36.035279512697606, 'value_scale': 66.09219067994027, 'discounted_advantage': -89.66098237252679, 'initial_state': 52.34236145019531, 'diff_eval': 2378.3776861255833} step=48000
2025-12-06 18:31.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.67it/s, critic_loss=147, actor_loss=-2.43, bc_loss=0.055]


2025-12-06 18:31.25 [info     ] TD3PlusBC_20251206181445: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.00467071533203125, 'time_algorithm_update': 0.011781086206436158, 'critic_loss': 147.67929718017578, 'actor_loss': -2.4339718508720396, 'bc_loss': 0.055062499821186064, 'time_step': 0.016718068838119507, 'td_error': 34.815842117743834, 'value_scale': 69.65667415931759, 'discounted_advantage': -87.3004502818951, 'initial_state': 56.04957962036133, 'diff_eval': 2018.9378916883763} step=49000
2025-12-06 18:31.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.80it/s, critic_loss=163, actor_loss=-2.44, bc_loss=0.0532]


2025-12-06 18:31.46 [info     ] TD3PlusBC_20251206181445: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.00477663516998291, 'time_algorithm_update': 0.0119676992893219, 'critic_loss': 163.27269702148436, 'actor_loss': -2.436699293613434, 'bc_loss': 0.05330121451616287, 'time_step': 0.016984911680221558, 'td_error': 39.19650596845473, 'value_scale': 71.78240084428299, 'discounted_advantage': -95.08791008173789, 'initial_state': 55.67835998535156, 'diff_eval': 2131.7082566297936} step=50000
2025-12-06 18:31.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.19it/s, critic_loss=177, actor_loss=-2.44, bc_loss=0.0528]


2025-12-06 18:32.06 [info     ] TD3PlusBC_20251206181445: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.00463554859161377, 'time_algorithm_update': 0.011714895248413086, 'critic_loss': 177.13180765533448, 'actor_loss': -2.4389175262451173, 'bc_loss': 0.05283685664832592, 'time_step': 0.016596961736679076, 'td_error': 43.883585712328944, 'value_scale': 76.89129655386657, 'discounted_advantage': -107.59405993583533, 'initial_state': 62.811805725097656, 'diff_eval': 2674.1001118956183} step=51000
2025-12-06 18:32.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.03it/s, critic_loss=195, actor_loss=-2.44, bc_loss=0.0536]


2025-12-06 18:32.27 [info     ] TD3PlusBC_20251206181445: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.00489015531539917, 'time_algorithm_update': 0.012320738792419433, 'critic_loss': 195.88326917266846, 'actor_loss': -2.438648666381836, 'bc_loss': 0.05358172442764044, 'time_step': 0.017492106676101685, 'td_error': 42.20954554614762, 'value_scale': 78.9913272230893, 'discounted_advantage': -105.38671429354193, 'initial_state': 62.82319259643555, 'diff_eval': 2822.7141961140974} step=52000
2025-12-06 18:32.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:18<00:00, 55.28it/s, critic_loss=204, actor_loss=-2.44, bc_loss=0.0538]


2025-12-06 18:32.49 [info     ] TD3PlusBC_20251206181445: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.005146566152572632, 'time_algorithm_update': 0.012303272247314452, 'critic_loss': 204.49714316940307, 'actor_loss': -2.4394842610359193, 'bc_loss': 0.05377097452431917, 'time_step': 0.017731157302856446, 'td_error': 47.13737059953326, 'value_scale': 83.21592288270081, 'discounted_advantage': -115.43243736688886, 'initial_state': 67.83818054199219, 'diff_eval': 2668.0663676841546} step=53000
2025-12-06 18:32.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:18<00:00, 53.96it/s, critic_loss=222, actor_loss=-2.44, bc_loss=0.053]


2025-12-06 18:33.11 [info     ] TD3PlusBC_20251206181445: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.00514710259437561, 'time_algorithm_update': 0.012717328786849976, 'critic_loss': 221.52787870788575, 'actor_loss': -2.4406038088798523, 'bc_loss': 0.053083446852862835, 'time_step': 0.018156224727630614, 'td_error': 44.42691161933429, 'value_scale': 86.90733580037714, 'discounted_advantage': -107.55433375667677, 'initial_state': 70.03076934814453, 'diff_eval': 2217.350677754675} step=54000
2025-12-06 18:33.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.25it/s, critic_loss=244, actor_loss=-2.44, bc_loss=0.0525]


2025-12-06 18:33.31 [info     ] TD3PlusBC_20251206181445: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.0046168437004089355, 'time_algorithm_update': 0.01168007755279541, 'critic_loss': 244.0304860305786, 'actor_loss': -2.442167125225067, 'bc_loss': 0.05247901348024607, 'time_step': 0.01655487608909607, 'td_error': 53.251042506549894, 'value_scale': 89.60114477985762, 'discounted_advantage': -119.98550211054896, 'initial_state': 69.43034362792969, 'diff_eval': 2336.5278240129055} step=55000
2025-12-06 18:33.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.64it/s, critic_loss=263, actor_loss=-2.44, bc_loss=0.0518]


2025-12-06 18:33.51 [info     ] TD3PlusBC_20251206181445: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.004617557048797607, 'time_algorithm_update': 0.011616802215576172, 'critic_loss': 263.1706113891602, 'actor_loss': -2.4433967332839965, 'bc_loss': 0.051824619293212894, 'time_step': 0.016478751182556154, 'td_error': 47.067659593988104, 'value_scale': 92.97184654892939, 'discounted_advantage': -117.25503011011318, 'initial_state': 74.19541931152344, 'diff_eval': 1927.607967304628} step=56000
2025-12-06 18:33.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.54it/s, critic_loss=285, actor_loss=-2.44, bc_loss=0.0508]


2025-12-06 18:34.12 [info     ] TD3PlusBC_20251206181445: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.004722094058990479, 'time_algorithm_update': 0.011806168079376221, 'critic_loss': 284.5603082885742, 'actor_loss': -2.4446702942848204, 'bc_loss': 0.050791006699204444, 'time_step': 0.016772294521331787, 'td_error': 54.27279519719429, 'value_scale': 97.46419357993676, 'discounted_advantage': -120.67870739941864, 'initial_state': 76.13587188720703, 'diff_eval': 2216.5538649197056} step=57000
2025-12-06 18:34.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.48it/s, critic_loss=301, actor_loss=-2.45, bc_loss=0.0512]


2025-12-06 18:34.32 [info     ] TD3PlusBC_20251206181445: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.004701661586761475, 'time_algorithm_update': 0.011844964742660523, 'critic_loss': 301.98808785247803, 'actor_loss': -2.445144907951355, 'bc_loss': 0.05117744615674019, 'time_step': 0.016795684099197388, 'td_error': 59.79325515507878, 'value_scale': 100.04986523782517, 'discounted_advantage': -127.85468192870398, 'initial_state': 78.97652435302734, 'diff_eval': 2634.624530840724} step=58000
2025-12-06 18:34.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.22it/s, critic_loss=322, actor_loss=-2.45, bc_loss=0.0512]


2025-12-06 18:34.53 [info     ] TD3PlusBC_20251206181445: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.004756796360015869, 'time_algorithm_update': 0.011840919017791747, 'critic_loss': 322.44959358215334, 'actor_loss': -2.4456344680786133, 'bc_loss': 0.05118491496145725, 'time_step': 0.016848413705825807, 'td_error': 56.062337082773354, 'value_scale': 103.28192490723987, 'discounted_advantage': -137.29029985906206, 'initial_state': 83.5860366821289, 'diff_eval': 2153.0217152290415} step=59000
2025-12-06 18:34.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.71it/s, critic_loss=340, actor_loss=-2.45, bc_loss=0.0514]


2025-12-06 18:35.13 [info     ] TD3PlusBC_20251206181445: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.004584931135177613, 'time_algorithm_update': 0.011616241455078126, 'critic_loss': 339.6294062423706, 'actor_loss': -2.44591695022583, 'bc_loss': 0.05139557282626629, 'time_step': 0.016446227788925172, 'td_error': 65.4630922875173, 'value_scale': 108.3141392502381, 'discounted_advantage': -147.11566142164725, 'initial_state': 88.29439544677734, 'diff_eval': 2021.9451464563292} step=60000
2025-12-06 18:35.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.72it/s, critic_loss=371, actor_loss=-2.45, bc_loss=0.0507]


2025-12-06 18:35.34 [info     ] TD3PlusBC_20251206181445: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.004637193441390991, 'time_algorithm_update': 0.011815432071685792, 'critic_loss': 370.60721907806396, 'actor_loss': -2.4470385313034058, 'bc_loss': 0.05074055926501751, 'time_step': 0.01671598196029663, 'td_error': 82.29377124527039, 'value_scale': 111.67703322772708, 'discounted_advantage': -156.09823657344907, 'initial_state': 86.22244262695312, 'diff_eval': 1862.4606464684227} step=61000
2025-12-06 18:35.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.78it/s, critic_loss=393, actor_loss=-2.45, bc_loss=0.0501]


2025-12-06 18:35.54 [info     ] TD3PlusBC_20251206181445: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.004652394533157349, 'time_algorithm_update': 0.011781410932540894, 'critic_loss': 393.36158010864256, 'actor_loss': -2.4480161762237547, 'bc_loss': 0.05004131709039211, 'time_step': 0.0166818528175354, 'td_error': 67.41666994062649, 'value_scale': 115.03237551235893, 'discounted_advantage': -148.2420884871975, 'initial_state': 90.26248168945312, 'diff_eval': 2070.458856722796} step=62000
2025-12-06 18:35.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.68it/s, critic_loss=424, actor_loss=-2.45, bc_loss=0.0498]


2025-12-06 18:36.14 [info     ] TD3PlusBC_20251206181445: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.004653170585632325, 'time_algorithm_update': 0.011843074083328247, 'critic_loss': 424.17266855621335, 'actor_loss': -2.448684810161591, 'bc_loss': 0.049802754282951354, 'time_step': 0.016736598014831544, 'td_error': 73.56966677475882, 'value_scale': 119.99263857995774, 'discounted_advantage': -164.70862916313664, 'initial_state': 97.08568572998047, 'diff_eval': 2109.653143416919} step=63000
2025-12-06 18:36.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.14it/s, critic_loss=456, actor_loss=-2.45, bc_loss=0.0493]


2025-12-06 18:36.35 [info     ] TD3PlusBC_20251206181445: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.004643494606018066, 'time_algorithm_update': 0.011744450092315674, 'critic_loss': 455.90209365081785, 'actor_loss': -2.44928032875061, 'bc_loss': 0.04932654020935297, 'time_step': 0.01662406086921692, 'td_error': 68.68495393534718, 'value_scale': 124.60509061773385, 'discounted_advantage': -160.46319975131095, 'initial_state': 101.36154174804688, 'diff_eval': 1964.4224012300547} step=64000
2025-12-06 18:36.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.56it/s, critic_loss=496, actor_loss=-2.45, bc_loss=0.0494]


2025-12-06 18:36.55 [info     ] TD3PlusBC_20251206181445: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.004571051359176636, 'time_algorithm_update': 0.011673132658004761, 'critic_loss': 496.2870478210449, 'actor_loss': -2.4497070479393006, 'bc_loss': 0.049349055968225, 'time_step': 0.01650162148475647, 'td_error': 78.51753614965072, 'value_scale': 128.3576403503546, 'discounted_advantage': -167.8541756757863, 'initial_state': 101.68875122070312, 'diff_eval': 2200.6867745701497} step=65000
2025-12-06 18:36.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.10it/s, critic_loss=539, actor_loss=-2.45, bc_loss=0.0502]


2025-12-06 18:37.16 [info     ] TD3PlusBC_20251206181445: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.004760133504867554, 'time_algorithm_update': 0.011931772470474243, 'critic_loss': 538.8241678771973, 'actor_loss': -2.4492157521247866, 'bc_loss': 0.05014420875906944, 'time_step': 0.016923116445541384, 'td_error': 79.17080582685354, 'value_scale': 133.53502919627954, 'discounted_advantage': -166.16394908108646, 'initial_state': 108.05560302734375, 'diff_eval': 2018.2601558523095} step=66000
2025-12-06 18:37.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.07it/s, critic_loss=593, actor_loss=-2.45, bc_loss=0.0501]


2025-12-06 18:37.36 [info     ] TD3PlusBC_20251206181445: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.004743523359298706, 'time_algorithm_update': 0.011628971815109253, 'critic_loss': 591.9744215850831, 'actor_loss': -2.449385368824005, 'bc_loss': 0.05015967109054327, 'time_step': 0.01662495160102844, 'td_error': 87.54141512591094, 'value_scale': 138.9762069625407, 'discounted_advantage': -172.2045934681171, 'initial_state': 109.94058227539062, 'diff_eval': 1950.4870180705334} step=67000
2025-12-06 18:37.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.75it/s, critic_loss=633, actor_loss=-2.45, bc_loss=0.049]


2025-12-06 18:37.57 [info     ] TD3PlusBC_20251206181445: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.004770614624023437, 'time_algorithm_update': 0.012571264982223511, 'critic_loss': 634.5348799896241, 'actor_loss': -2.4505744376182554, 'bc_loss': 0.049002765096724035, 'time_step': 0.017597667694091795, 'td_error': 90.47278352135307, 'value_scale': 142.41078270770677, 'discounted_advantage': -188.58510203959574, 'initial_state': 113.56554412841797, 'diff_eval': 2045.3681781208704} step=68000
2025-12-06 18:37.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.46it/s, critic_loss=688, actor_loss=-2.45, bc_loss=0.0492]


2025-12-06 18:38.17 [info     ] TD3PlusBC_20251206181445: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.00459932804107666, 'time_algorithm_update': 0.01166117763519287, 'critic_loss': 689.0713444366455, 'actor_loss': -2.450468110084534, 'bc_loss': 0.049199417434632776, 'time_step': 0.01650961399078369, 'td_error': 96.71810742180361, 'value_scale': 149.9859386309794, 'discounted_advantage': -188.33051480323238, 'initial_state': 121.66117858886719, 'diff_eval': 1833.7417948284321} step=69000
2025-12-06 18:38.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.29it/s, critic_loss=751, actor_loss=-2.45, bc_loss=0.0483]


2025-12-06 18:38.38 [info     ] TD3PlusBC_20251206181445: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.004676118612289428, 'time_algorithm_update': 0.01161484169960022, 'critic_loss': 752.2005340881348, 'actor_loss': -2.4512953686714174, 'bc_loss': 0.048362602815032005, 'time_step': 0.016559969186782836, 'td_error': 113.76323962165404, 'value_scale': 154.32709130497412, 'discounted_advantage': -210.65911118434994, 'initial_state': 120.35095977783203, 'diff_eval': 1611.4382302442355} step=70000
2025-12-06 18:38.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.14it/s, critic_loss=844, actor_loss=-2.45, bc_loss=0.0488] 


2025-12-06 18:38.58 [info     ] TD3PlusBC_20251206181445: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.00472852635383606, 'time_algorithm_update': 0.011906336307525635, 'critic_loss': 844.2926419143677, 'actor_loss': -2.4509988799095153, 'bc_loss': 0.04870900971442461, 'time_step': 0.016895228147506713, 'td_error': 127.53275512261207, 'value_scale': 163.59232905564545, 'discounted_advantage': -222.62995846620836, 'initial_state': 128.12738037109375, 'diff_eval': 2085.0824562121948} step=71000
2025-12-06 18:38.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.52it/s, critic_loss=905, actor_loss=-2.45, bc_loss=0.0483]


2025-12-06 18:39.19 [info     ] TD3PlusBC_20251206181445: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.004805989503860473, 'time_algorithm_update': 0.012277901887893676, 'critic_loss': 907.0954975891113, 'actor_loss': -2.451377296924591, 'bc_loss': 0.04832757016271353, 'time_step': 0.01735381031036377, 'td_error': 127.56062097448385, 'value_scale': 170.39917600364717, 'discounted_advantage': -225.28009063988912, 'initial_state': 133.87408447265625, 'diff_eval': 1741.9469089142451} step=72000
2025-12-06 18:39.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.06it/s, critic_loss=1e+3, actor_loss=-2.45, bc_loss=0.0478]


2025-12-06 18:39.40 [info     ] TD3PlusBC_20251206181445: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.004765487670898437, 'time_algorithm_update': 0.0119039568901062, 'critic_loss': 1006.6653550872803, 'actor_loss': -2.451806670188904, 'bc_loss': 0.04782681243866682, 'time_step': 0.01691327714920044, 'td_error': 157.7029115671231, 'value_scale': 179.78846569812708, 'discounted_advantage': -240.45152025960394, 'initial_state': 139.0937957763672, 'diff_eval': 2128.5350096329335} step=73000
2025-12-06 18:39.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:18<00:00, 55.08it/s, critic_loss=1.12e+3, actor_loss=-2.45, bc_loss=0.0466]


2025-12-06 18:40.02 [info     ] TD3PlusBC_20251206181445: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.004806611776351929, 'time_algorithm_update': 0.012758492946624756, 'critic_loss': 1119.3787163391114, 'actor_loss': -2.453012752532959, 'bc_loss': 0.04659968876093626, 'time_step': 0.01782157874107361, 'td_error': 162.5384141816772, 'value_scale': 186.20908407513474, 'discounted_advantage': -243.50466121785414, 'initial_state': 143.38751220703125, 'diff_eval': 1554.3150508891324} step=74000
2025-12-06 18:40.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.11it/s, critic_loss=1.23e+3, actor_loss=-2.45, bc_loss=0.0471]


2025-12-06 18:40.23 [info     ] TD3PlusBC_20251206181445: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.004758676528930664, 'time_algorithm_update': 0.011897845029830932, 'critic_loss': 1228.631688949585, 'actor_loss': -2.4524133086204527, 'bc_loss': 0.04712129902839661, 'time_step': 0.016903451919555664, 'td_error': 202.40431160032878, 'value_scale': 194.45058862601422, 'discounted_advantage': -268.4548713362455, 'initial_state': 150.23829650878906, 'diff_eval': 2129.101479533951} step=75000
2025-12-06 18:40.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.74it/s, critic_loss=1.35e+3, actor_loss=-2.45, bc_loss=0.0473]


2025-12-06 18:40.43 [info     ] TD3PlusBC_20251206181445: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.004789082288742065, 'time_algorithm_update': 0.011980806589126586, 'critic_loss': 1346.5868596343994, 'actor_loss': -2.4523013982772826, 'bc_loss': 0.047250993117690085, 'time_step': 0.01701828408241272, 'td_error': 190.61792485765878, 'value_scale': 200.93687100022757, 'discounted_advantage': -257.98884512913907, 'initial_state': 156.2855224609375, 'diff_eval': 1798.380970199007} step=76000
2025-12-06 18:40.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.58it/s, critic_loss=1.49e+3, actor_loss=-2.45, bc_loss=0.0467]


2025-12-06 18:41.04 [info     ] TD3PlusBC_20251206181445: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.004641724586486816, 'time_algorithm_update': 0.011885879516601562, 'critic_loss': 1493.9076887207032, 'actor_loss': -2.4529050059318545, 'bc_loss': 0.04669793552160263, 'time_step': 0.016769367694854737, 'td_error': 192.87090897360332, 'value_scale': 210.62264964126882, 'discounted_advantage': -276.2079671108182, 'initial_state': 165.37612915039062, 'diff_eval': 1562.484832716749} step=77000
2025-12-06 18:41.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.43it/s, critic_loss=1.67e+3, actor_loss=-2.45, bc_loss=0.0466]


2025-12-06 18:41.25 [info     ] TD3PlusBC_20251206181445: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.00475381064414978, 'time_algorithm_update': 0.012096473455429076, 'critic_loss': 1666.4176537475587, 'actor_loss': -2.453019857406616, 'bc_loss': 0.046614026471972464, 'time_step': 0.017096566438674927, 'td_error': 238.47912364903496, 'value_scale': 220.47866241889946, 'discounted_advantage': -302.5303509624677, 'initial_state': 172.03285217285156, 'diff_eval': 2071.69763169656} step=78000
2025-12-06 18:41.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:18<00:00, 53.35it/s, critic_loss=1.81e+3, actor_loss=-2.45, bc_loss=0.0468]


2025-12-06 18:41.47 [info     ] TD3PlusBC_20251206181445: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.005034834861755371, 'time_algorithm_update': 0.013117751121520997, 'critic_loss': 1806.8499338684082, 'actor_loss': -2.4528725843429564, 'bc_loss': 0.046811323538422586, 'time_step': 0.01841302490234375, 'td_error': 249.20425490248522, 'value_scale': 231.08554783490956, 'discounted_advantage': -315.442533183322, 'initial_state': 180.06626892089844, 'diff_eval': 1722.0940250759825} step=79000
2025-12-06 18:41.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.04it/s, critic_loss=1.98e+3, actor_loss=-2.45, bc_loss=0.0458]


2025-12-06 18:42.07 [info     ] TD3PlusBC_20251206181445: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.00478117847442627, 'time_algorithm_update': 0.011861279010772705, 'critic_loss': 1984.5944616394042, 'actor_loss': -2.4536769642829896, 'bc_loss': 0.04584111548215151, 'time_step': 0.016901918172836303, 'td_error': 279.3029609204223, 'value_scale': 239.8429223513863, 'discounted_advantage': -334.18692123324007, 'initial_state': 189.1202392578125, 'diff_eval': 1719.3867726184465} step=80000
2025-12-06 18:42.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.44it/s, critic_loss=2.21e+3, actor_loss=-2.45, bc_loss=0.046]


2025-12-06 18:42.28 [info     ] TD3PlusBC_20251206181445: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.004841265201568604, 'time_algorithm_update': 0.011989293813705445, 'critic_loss': 2210.90222076416, 'actor_loss': -2.4536506123542785, 'bc_loss': 0.046031060688197616, 'time_step': 0.01708521509170532, 'td_error': 292.08611534654904, 'value_scale': 248.6807574378475, 'discounted_advantage': -332.08995885600945, 'initial_state': 195.30502319335938, 'diff_eval': 1838.4971503036675} step=81000
2025-12-06 18:42.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.86it/s, critic_loss=2.4e+3, actor_loss=-2.45, bc_loss=0.0459]


2025-12-06 18:42.49 [info     ] TD3PlusBC_20251206181445: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.004841809749603272, 'time_algorithm_update': 0.011878283977508544, 'critic_loss': 2406.965178192139, 'actor_loss': -2.453763144016266, 'bc_loss': 0.045919014617800714, 'time_step': 0.016966877937316896, 'td_error': 302.1754633019025, 'value_scale': 259.7477469528211, 'discounted_advantage': -330.44020788913934, 'initial_state': 204.09112548828125, 'diff_eval': 1788.9290682569228} step=82000
2025-12-06 18:42.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.30it/s, critic_loss=2.66e+3, actor_loss=-2.45, bc_loss=0.0463]


2025-12-06 18:43.09 [info     ] TD3PlusBC_20251206181445: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.0046686031818389895, 'time_algorithm_update': 0.01191811728477478, 'critic_loss': 2662.5970712280273, 'actor_loss': -2.453389543056488, 'bc_loss': 0.046237321071326734, 'time_step': 0.016832254648208617, 'td_error': 358.9884668951242, 'value_scale': 271.6111675559825, 'discounted_advantage': -376.0992524290255, 'initial_state': 211.00320434570312, 'diff_eval': 1938.7825879084148} step=83000
2025-12-06 18:43.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.48it/s, critic_loss=2.84e+3, actor_loss=-2.45, bc_loss=0.0454]


2025-12-06 18:43.30 [info     ] TD3PlusBC_20251206181445: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.0046007981300354005, 'time_algorithm_update': 0.01168236780166626, 'critic_loss': 2835.382513458252, 'actor_loss': -2.454330312728882, 'bc_loss': 0.04531670980900526, 'time_step': 0.016518978834152223, 'td_error': 347.678165227883, 'value_scale': 284.6785379491328, 'discounted_advantage': -368.5536466440564, 'initial_state': 220.91683959960938, 'diff_eval': 1674.151597750646} step=84000
2025-12-06 18:43.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.56it/s, critic_loss=3.17e+3, actor_loss=-2.45, bc_loss=0.0461]


2025-12-06 18:43.50 [info     ] TD3PlusBC_20251206181445: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.004770202875137329, 'time_algorithm_update': 0.012015286207199096, 'critic_loss': 3168.0729646606446, 'actor_loss': -2.453489911556244, 'bc_loss': 0.046103793725371364, 'time_step': 0.017042446851730345, 'td_error': 433.4956402713121, 'value_scale': 296.3122802056491, 'discounted_advantage': -418.57209692339256, 'initial_state': 233.2222137451172, 'diff_eval': 1658.6583784573395} step=85000
2025-12-06 18:43.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.05it/s, critic_loss=3.47e+3, actor_loss=-2.45, bc_loss=0.0456]


2025-12-06 18:44.10 [info     ] TD3PlusBC_20251206181445: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.004511826038360596, 'time_algorithm_update': 0.011596919536590576, 'critic_loss': 3478.6095927124024, 'actor_loss': -2.4540185613632204, 'bc_loss': 0.045566114917397496, 'time_step': 0.016352302312850953, 'td_error': 426.1768509814457, 'value_scale': 305.64901675842236, 'discounted_advantage': -415.64791997395145, 'initial_state': 234.5544891357422, 'diff_eval': 1598.7385558481267} step=86000
2025-12-06 18:44.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.20it/s, critic_loss=3.81e+3, actor_loss=-2.45, bc_loss=0.0453]


2025-12-06 18:44.31 [info     ] TD3PlusBC_20251206181445: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.004662424325942993, 'time_algorithm_update': 0.011673057079315185, 'critic_loss': 3811.455212097168, 'actor_loss': -2.4543252930641173, 'bc_loss': 0.045313412986695764, 'time_step': 0.016574844598770143, 'td_error': 453.7788895077171, 'value_scale': 323.81626108685964, 'discounted_advantage': -435.96556050252326, 'initial_state': 255.9471893310547, 'diff_eval': 1704.9169045825304} step=87000
2025-12-06 18:44.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.59it/s, critic_loss=4.11e+3, actor_loss=-2.45, bc_loss=0.0454]


2025-12-06 18:44.51 [info     ] TD3PlusBC_20251206181445: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.004691588401794434, 'time_algorithm_update': 0.011817046642303467, 'critic_loss': 4108.230418212891, 'actor_loss': -2.4541771531105043, 'bc_loss': 0.04538182385265827, 'time_step': 0.016755020380020143, 'td_error': 476.0964303573809, 'value_scale': 334.9026218168094, 'discounted_advantage': -424.7171478785028, 'initial_state': 259.9998779296875, 'diff_eval': 1496.2924180199495} step=88000
2025-12-06 18:44.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.51it/s, critic_loss=4.51e+3, actor_loss=-2.45, bc_loss=0.0452]


2025-12-06 18:45.12 [info     ] TD3PlusBC_20251206181445: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.004804878711700439, 'time_algorithm_update': 0.011994758367538452, 'critic_loss': 4515.408201660156, 'actor_loss': -2.4544363479614257, 'bc_loss': 0.04521754291653633, 'time_step': 0.017060315132141114, 'td_error': 504.1737818935514, 'value_scale': 347.1684851762335, 'discounted_advantage': -471.3307554604527, 'initial_state': 271.41796875, 'diff_eval': 1453.747964397783} step=89000
2025-12-06 18:45.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.06it/s, critic_loss=4.92e+3, actor_loss=-2.45, bc_loss=0.0455]


2025-12-06 18:45.32 [info     ] TD3PlusBC_20251206181445: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.004624532222747803, 'time_algorithm_update': 0.01176951003074646, 'critic_loss': 4922.964237182618, 'actor_loss': -2.454125024795532, 'bc_loss': 0.045485117875039574, 'time_step': 0.016638443946838378, 'td_error': 598.2265952174758, 'value_scale': 365.6107862397391, 'discounted_advantage': -494.657147088137, 'initial_state': 288.3940124511719, 'diff_eval': 1855.7497412325292} step=90000
2025-12-06 18:45.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.12it/s, critic_loss=5.43e+3, actor_loss=-2.45, bc_loss=0.0453]


2025-12-06 18:45.53 [info     ] TD3PlusBC_20251206181445: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.004850705862045288, 'time_algorithm_update': 0.012094026565551757, 'critic_loss': 5436.420766235352, 'actor_loss': -2.454242539405823, 'bc_loss': 0.04533367795497179, 'time_step': 0.01719828200340271, 'td_error': 672.803806597011, 'value_scale': 380.2188066864653, 'discounted_advantage': -510.919525156048, 'initial_state': 296.81402587890625, 'diff_eval': 1972.474779434597} step=91000
2025-12-06 18:45.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.80it/s, critic_loss=5.95e+3, actor_loss=-2.45, bc_loss=0.0456]


2025-12-06 18:46.14 [info     ] TD3PlusBC_20251206181445: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.004650806427001953, 'time_algorithm_update': 0.012130758762359618, 'critic_loss': 5955.50178338623, 'actor_loss': -2.4539643273353575, 'bc_loss': 0.04557275532931089, 'time_step': 0.01702265501022339, 'td_error': 694.8411380386, 'value_scale': 396.46593334648855, 'discounted_advantage': -525.103758710517, 'initial_state': 311.01226806640625, 'diff_eval': 1820.6972371698846} step=92000
2025-12-06 18:46.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.64it/s, critic_loss=6.45e+3, actor_loss=-2.45, bc_loss=0.0454]


2025-12-06 18:46.35 [info     ] TD3PlusBC_20251206181445: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.004703208446502686, 'time_algorithm_update': 0.012105622053146362, 'critic_loss': 6448.674310180664, 'actor_loss': -2.454067113876343, 'bc_loss': 0.04545798159390688, 'time_step': 0.017053345680236816, 'td_error': 723.8587030799512, 'value_scale': 411.2307851236604, 'discounted_advantage': -554.7860325720565, 'initial_state': 317.7181396484375, 'diff_eval': 1557.1766278666764} step=93000
2025-12-06 18:46.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.39it/s, critic_loss=7.06e+3, actor_loss=-2.45, bc_loss=0.046]


2025-12-06 18:46.57 [info     ] TD3PlusBC_20251206181445: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.004619446039199829, 'time_algorithm_update': 0.01253991436958313, 'critic_loss': 7056.721892333984, 'actor_loss': -2.453580228328705, 'bc_loss': 0.04597248776257038, 'time_step': 0.017409030199050903, 'td_error': 882.6603155889995, 'value_scale': 434.9032371693665, 'discounted_advantage': -554.9173620308488, 'initial_state': 342.2485656738281, 'diff_eval': 1798.5937495970297} step=94000
2025-12-06 18:46.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.87it/s, critic_loss=7.64e+3, actor_loss=-2.45, bc_loss=0.0463]


2025-12-06 18:47.18 [info     ] TD3PlusBC_20251206181445: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.004668411254882813, 'time_algorithm_update': 0.012635341167449951, 'critic_loss': 7630.368420532227, 'actor_loss': -2.4532257604598997, 'bc_loss': 0.046269604369997976, 'time_step': 0.017571743965148927, 'td_error': 937.96739918104, 'value_scale': 450.23899744742073, 'discounted_advantage': -590.7860814372635, 'initial_state': 353.66912841796875, 'diff_eval': 1768.7081728217381} step=95000
2025-12-06 18:47.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.45it/s, critic_loss=8.53e+3, actor_loss=-2.45, bc_loss=0.0448]


2025-12-06 18:47.38 [info     ] TD3PlusBC_20251206181445: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.004585093259811402, 'time_algorithm_update': 0.011459991931915284, 'critic_loss': 8543.488628112793, 'actor_loss': -2.454789980888367, 'bc_loss': 0.044802195325493815, 'time_step': 0.016257419109344484, 'td_error': 1054.129934250118, 'value_scale': 466.13199573910146, 'discounted_advantage': -651.599397633703, 'initial_state': 368.25079345703125, 'diff_eval': 1761.4876641834737} step=96000
2025-12-06 18:47.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.28it/s, critic_loss=9.53e+3, actor_loss=-2.45, bc_loss=0.0452]


2025-12-06 18:47.58 [info     ] TD3PlusBC_20251206181445: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.004821555852890015, 'time_algorithm_update': 0.011790426969528199, 'critic_loss': 9512.138873046875, 'actor_loss': -2.454386309146881, 'bc_loss': 0.04517748118937016, 'time_step': 0.01685233426094055, 'td_error': 1036.0519033428543, 'value_scale': 488.55420411742244, 'discounted_advantage': -664.4848236208019, 'initial_state': 379.1123352050781, 'diff_eval': 1388.201191423523} step=97000
2025-12-06 18:47.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.11it/s, critic_loss=1.04e+4, actor_loss=-2.45, bc_loss=0.0446]


2025-12-06 18:48.19 [info     ] TD3PlusBC_20251206181445: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.004703844547271728, 'time_algorithm_update': 0.0119322669506073, 'critic_loss': 10425.491142578125, 'actor_loss': -2.4548589024543763, 'bc_loss': 0.04464495133608579, 'time_step': 0.01689037609100342, 'td_error': 1383.9626337763855, 'value_scale': 513.9690743995532, 'discounted_advantage': -721.5194359207801, 'initial_state': 394.7364196777344, 'diff_eval': 1925.4268106514157} step=98000
2025-12-06 18:48.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.19it/s, critic_loss=1.16e+4, actor_loss=-2.45, bc_loss=0.0456]


2025-12-06 18:48.40 [info     ] TD3PlusBC_20251206181445: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.004665359258651734, 'time_algorithm_update': 0.012004516363143921, 'critic_loss': 11621.761025390624, 'actor_loss': -2.453925225734711, 'bc_loss': 0.04557890186458826, 'time_step': 0.016899995326995848, 'td_error': 1517.3563269948386, 'value_scale': 537.1749260799284, 'discounted_advantage': -744.0828489626323, 'initial_state': 411.2930908203125, 'diff_eval': 1900.383877153034} step=99000
2025-12-06 18:48.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.29it/s, critic_loss=1.27e+4, actor_loss=-2.45, bc_loss=0.0451]


2025-12-06 18:49.00 [info     ] TD3PlusBC_20251206181445: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.004443780660629272, 'time_algorithm_update': 0.012196152687072755, 'critic_loss': 12693.028298583984, 'actor_loss': -2.4543001608848574, 'bc_loss': 0.04516532972455025, 'time_step': 0.016879055738449097, 'td_error': 1404.3792075840913, 'value_scale': 557.4080121535168, 'discounted_advantage': -739.9772203817771, 'initial_state': 424.8872985839844, 'diff_eval': 1432.2333802628689} step=100000
2025-12-06 18:49.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.46it/s, critic_loss=1.4e+4, actor_loss=-2.45, bc_loss=0.0456]


2025-12-06 18:49.20 [info     ] TD3PlusBC_20251206181445: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.004638959884643554, 'time_algorithm_update': 0.011635430812835693, 'critic_loss': 13935.716469970703, 'actor_loss': -2.453966595649719, 'bc_loss': 0.04558163097500801, 'time_step': 0.01652162742614746, 'td_error': 1744.8670768479492, 'value_scale': 593.9668879672847, 'discounted_advantage': -826.427392749231, 'initial_state': 459.07928466796875, 'diff_eval': 1625.2488624640528} step=101000
2025-12-06 18:49.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.09it/s, critic_loss=1.53e+4, actor_loss=-2.45, bc_loss=0.0455]


2025-12-06 18:49.41 [info     ] TD3PlusBC_20251206181445: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.004684648513793945, 'time_algorithm_update': 0.011681257486343384, 'critic_loss': 15366.62640258789, 'actor_loss': -2.454082179546356, 'bc_loss': 0.04551922843977809, 'time_step': 0.01661607003211975, 'td_error': 1624.2871098122719, 'value_scale': 614.1336877635819, 'discounted_advantage': -808.2990773564317, 'initial_state': 475.869873046875, 'diff_eval': 1495.107466840438} step=102000
2025-12-06 18:49.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.10it/s, critic_loss=1.74e+4, actor_loss=-2.46, bc_loss=0.0443]


2025-12-06 18:50.02 [info     ] TD3PlusBC_20251206181445: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.004887041568756104, 'time_algorithm_update': 0.012056021213531495, 'critic_loss': 17376.283462402345, 'actor_loss': -2.4552637419700623, 'bc_loss': 0.04425727776437998, 'time_step': 0.017193554639816283, 'td_error': 1838.909469499592, 'value_scale': 638.6589849812586, 'discounted_advantage': -856.9839417911057, 'initial_state': 492.406494140625, 'diff_eval': 1550.2472619567698} step=103000
2025-12-06 18:50.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.32it/s, critic_loss=1.89e+4, actor_loss=-2.45, bc_loss=0.0446]


2025-12-06 18:50.22 [info     ] TD3PlusBC_20251206181445: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.004698784351348877, 'time_algorithm_update': 0.011880344867706298, 'critic_loss': 18865.241825195313, 'actor_loss': -2.454870916366577, 'bc_loss': 0.04463189757615328, 'time_step': 0.0168391432762146, 'td_error': 2170.8818207748204, 'value_scale': 677.9878703370778, 'discounted_advantage': -913.1191903402918, 'initial_state': 529.18896484375, 'diff_eval': 1674.0270730367993} step=104000
2025-12-06 18:50.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.35it/s, critic_loss=2.11e+4, actor_loss=-2.45, bc_loss=0.0445]


2025-12-06 18:50.43 [info     ] TD3PlusBC_20251206181445: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.0047063839435577395, 'time_algorithm_update': 0.011877803087234497, 'critic_loss': 21029.78568310547, 'actor_loss': -2.454945563316345, 'bc_loss': 0.04453335357457399, 'time_step': 0.016839415788650513, 'td_error': 2376.4625362259235, 'value_scale': 704.5710132659631, 'discounted_advantage': -942.1967335524173, 'initial_state': 547.3877563476562, 'diff_eval': 1843.2659419153806} step=105000
2025-12-06 18:50.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.59it/s, critic_loss=2.24e+4, actor_loss=-2.46, bc_loss=0.0443]


2025-12-06 18:51.03 [info     ] TD3PlusBC_20251206181445: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.004560935258865357, 'time_algorithm_update': 0.011695467233657837, 'critic_loss': 22388.295442382812, 'actor_loss': -2.4553085165023805, 'bc_loss': 0.04426689637452364, 'time_step': 0.016490782976150514, 'td_error': 2439.2988191095096, 'value_scale': 739.2143892478463, 'discounted_advantage': -1016.4301391463231, 'initial_state': 582.5608520507812, 'diff_eval': 1614.6783173024169} step=106000
2025-12-06 18:51.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.62it/s, critic_loss=2.46e+4, actor_loss=-2.46, bc_loss=0.0439]


2025-12-06 18:51.24 [info     ] TD3PlusBC_20251206181445: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.004733164310455323, 'time_algorithm_update': 0.012004227638244628, 'critic_loss': 24626.408455078126, 'actor_loss': -2.4556197485923765, 'bc_loss': 0.04394084889441729, 'time_step': 0.017004598140716552, 'td_error': 2905.621896721843, 'value_scale': 766.5206972619415, 'discounted_advantage': -1086.5399456122746, 'initial_state': 596.5322875976562, 'diff_eval': 1651.4823433877502} step=107000
2025-12-06 18:51.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.74it/s, critic_loss=2.7e+4, actor_loss=-2.45, bc_loss=0.0445]


2025-12-06 18:51.44 [info     ] TD3PlusBC_20251206181445: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.004661800146102906, 'time_algorithm_update': 0.011782273530960083, 'critic_loss': 27042.683008789063, 'actor_loss': -2.454963737487793, 'bc_loss': 0.04449823839962482, 'time_step': 0.01670212149620056, 'td_error': 2829.1070106932216, 'value_scale': 799.7889307474551, 'discounted_advantage': -1083.8295415549064, 'initial_state': 624.1799926757812, 'diff_eval': 1695.178216495169} step=108000
2025-12-06 18:51.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.69it/s, critic_loss=2.93e+4, actor_loss=-2.45, bc_loss=0.0446]


2025-12-06 18:52.06 [info     ] TD3PlusBC_20251206181445: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.004985879421234131, 'time_algorithm_update': 0.012347050189971924, 'critic_loss': 29400.30823046875, 'actor_loss': -2.4548928565979002, 'bc_loss': 0.044624519273638724, 'time_step': 0.017597813367843627, 'td_error': 3753.7321233623, 'value_scale': 827.3274886714044, 'discounted_advantage': -1198.5132261330862, 'initial_state': 645.8507080078125, 'diff_eval': 2014.1931677866878} step=109000
2025-12-06 18:52.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.72it/s, critic_loss=3.19e+4, actor_loss=-2.46, bc_loss=0.0438]


2025-12-06 18:52.26 [info     ] TD3PlusBC_20251206181445: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.004806864976882935, 'time_algorithm_update': 0.011945885181427002, 'critic_loss': 32018.38905810547, 'actor_loss': -2.455776786327362, 'bc_loss': 0.0437910863161087, 'time_step': 0.01700379180908203, 'td_error': 3432.610847024772, 'value_scale': 858.79231643357, 'discounted_advantage': -1208.6981737860733, 'initial_state': 674.6242065429688, 'diff_eval': 1556.9669404399156} step=110000
2025-12-06 18:52.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.82it/s, critic_loss=3.52e+4, actor_loss=-2.46, bc_loss=0.0434]


2025-12-06 18:52.46 [info     ] TD3PlusBC_20251206181445: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.004469735383987427, 'time_algorithm_update': 0.011403478622436524, 'critic_loss': 35174.16263916015, 'actor_loss': -2.456117018222809, 'bc_loss': 0.043469097770750524, 'time_step': 0.01613416528701782, 'td_error': 3317.0611339643633, 'value_scale': 902.2032878712656, 'discounted_advantage': -1198.7002792015965, 'initial_state': 714.310302734375, 'diff_eval': 1543.6880875399909} step=111000
2025-12-06 18:52.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.00it/s, critic_loss=3.86e+4, actor_loss=-2.46, bc_loss=0.0438]


2025-12-06 18:53.06 [info     ] TD3PlusBC_20251206181445: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.004687171697616577, 'time_algorithm_update': 0.011739513397216798, 'critic_loss': 38603.783187011715, 'actor_loss': -2.4558763422966003, 'bc_loss': 0.043770176228135824, 'time_step': 0.01666338849067688, 'td_error': 3542.450299769832, 'value_scale': 940.4011334581527, 'discounted_advantage': -1253.735857779727, 'initial_state': 751.1519775390625, 'diff_eval': 1531.464758481615} step=112000
2025-12-06 18:53.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.06it/s, critic_loss=4.14e+4, actor_loss=-2.46, bc_loss=0.0438]


2025-12-06 18:53.27 [info     ] TD3PlusBC_20251206181445: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.004680066108703613, 'time_algorithm_update': 0.011681248426437378, 'critic_loss': 41403.2214609375, 'actor_loss': -2.4558153772354125, 'bc_loss': 0.043839973613619805, 'time_step': 0.016621726989746094, 'td_error': 3373.609408013073, 'value_scale': 964.0347418709154, 'discounted_advantage': -1261.857974366396, 'initial_state': 775.330322265625, 'diff_eval': 1393.5807046244106} step=113000
2025-12-06 18:53.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.32it/s, critic_loss=4.43e+4, actor_loss=-2.46, bc_loss=0.0445]


2025-12-06 18:53.47 [info     ] TD3PlusBC_20251206181445: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.00473170804977417, 'time_algorithm_update': 0.01187595820426941, 'critic_loss': 44426.5841015625, 'actor_loss': -2.4551202635765077, 'bc_loss': 0.0445124126970768, 'time_step': 0.016842772722244263, 'td_error': 4012.483722160423, 'value_scale': 998.023649025443, 'discounted_advantage': -1317.5143122755226, 'initial_state': 793.7734375, 'diff_eval': 1708.7620758249448} step=114000
2025-12-06 18:53.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.64it/s, critic_loss=4.85e+4, actor_loss=-2.46, bc_loss=0.0444]


2025-12-06 18:54.08 [info     ] TD3PlusBC_20251206181445: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.004787810087203979, 'time_algorithm_update': 0.012004705667495728, 'critic_loss': 48468.170515625, 'actor_loss': -2.455286521911621, 'bc_loss': 0.04438629235327244, 'time_step': 0.017038444757461548, 'td_error': 4275.6040293431315, 'value_scale': 1032.2104816292876, 'discounted_advantage': -1371.6430970977005, 'initial_state': 834.1759643554688, 'diff_eval': 1972.2388200351452} step=115000
2025-12-06 18:54.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.77it/s, critic_loss=5.19e+4, actor_loss=-2.46, bc_loss=0.044]


2025-12-06 18:54.28 [info     ] TD3PlusBC_20251206181445: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.004584509134292603, 'time_algorithm_update': 0.011629252433776856, 'critic_loss': 51815.28300292969, 'actor_loss': -2.45543284034729, 'bc_loss': 0.043980206601321695, 'time_step': 0.016446423053741454, 'td_error': 3863.2068372717686, 'value_scale': 1069.7004484574734, 'discounted_advantage': -1392.4664893710762, 'initial_state': 858.6720581054688, 'diff_eval': 1378.9547208019194} step=116000
2025-12-06 18:54.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.12it/s, critic_loss=5.53e+4, actor_loss=-2.45, bc_loss=0.0443]


2025-12-06 18:54.49 [info     ] TD3PlusBC_20251206181445: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.004746616363525391, 'time_algorithm_update': 0.011907338857650758, 'critic_loss': 55355.44204785156, 'actor_loss': -2.45499205827713, 'bc_loss': 0.04423677768558264, 'time_step': 0.016895005464553834, 'td_error': 4565.129498087043, 'value_scale': 1108.6359025262927, 'discounted_advantage': -1443.8484717081194, 'initial_state': 898.3001708984375, 'diff_eval': 1715.243789159755} step=117000
2025-12-06 18:54.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.79it/s, critic_loss=5.93e+4, actor_loss=-2.45, bc_loss=0.0448]


2025-12-06 18:55.10 [info     ] TD3PlusBC_20251206181445: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.004762064933776855, 'time_algorithm_update': 0.01191449236869812, 'critic_loss': 59273.80850976562, 'actor_loss': -2.45467076253891, 'bc_loss': 0.044806382954120635, 'time_step': 0.016957350730895996, 'td_error': 5020.643138767014, 'value_scale': 1149.6572376388717, 'discounted_advantage': -1569.6314276090136, 'initial_state': 924.333251953125, 'diff_eval': 1703.548463383257} step=118000
2025-12-06 18:55.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.00it/s, critic_loss=6.42e+4, actor_loss=-2.46, bc_loss=0.0435]


2025-12-06 18:55.30 [info     ] TD3PlusBC_20251206181445: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.0047767508029937745, 'time_algorithm_update': 0.011901386499404908, 'critic_loss': 64293.53155175781, 'actor_loss': -2.4556741771698, 'bc_loss': 0.04352885565906763, 'time_step': 0.016929104089736937, 'td_error': 5129.55183697736, 'value_scale': 1180.203429306043, 'discounted_advantage': -1601.0221092476816, 'initial_state': 954.8164672851562, 'diff_eval': 1579.2116776288356} step=119000
2025-12-06 18:55.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.54it/s, critic_loss=6.93e+4, actor_loss=-2.45, bc_loss=0.0448]


2025-12-06 18:55.51 [info     ] TD3PlusBC_20251206181445: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.0048604412078857425, 'time_algorithm_update': 0.011955827474594116, 'critic_loss': 69415.16856640625, 'actor_loss': -2.454290843963623, 'bc_loss': 0.044750350788235665, 'time_step': 0.017062361478805542, 'td_error': 5472.784154590929, 'value_scale': 1218.4683271025972, 'discounted_advantage': -1614.3783948610956, 'initial_state': 981.5997924804688, 'diff_eval': 1773.7177122536511} step=120000
2025-12-06 18:55.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.71it/s, critic_loss=7.38e+4, actor_loss=-2.45, bc_loss=0.0445]


2025-12-06 18:56.11 [info     ] TD3PlusBC_20251206181445: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.004623006343841553, 'time_algorithm_update': 0.01158165431022644, 'critic_loss': 73835.121203125, 'actor_loss': -2.4546546335220336, 'bc_loss': 0.04447998530417681, 'time_step': 0.016452747106552125, 'td_error': 5967.609406730755, 'value_scale': 1271.4394192052107, 'discounted_advantage': -1689.951534451108, 'initial_state': 1030.361572265625, 'diff_eval': 1680.6383121770089} step=121000
2025-12-06 18:56.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.74it/s, critic_loss=8.15e+4, actor_loss=-2.45, bc_loss=0.0449]


2025-12-06 18:56.32 [info     ] TD3PlusBC_20251206181445: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.004721903800964355, 'time_algorithm_update': 0.012008652448654174, 'critic_loss': 81468.23934765624, 'actor_loss': -2.4541390557289122, 'bc_loss': 0.04489043474942446, 'time_step': 0.016997331380844118, 'td_error': 6616.2575122367525, 'value_scale': 1306.1429373268709, 'discounted_advantage': -1795.5378943940016, 'initial_state': 1051.6314697265625, 'diff_eval': 1676.3570447025777} step=122000
2025-12-06 18:56.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.53it/s, critic_loss=8.61e+4, actor_loss=-2.46, bc_loss=0.0437]


2025-12-06 18:56.53 [info     ] TD3PlusBC_20251206181445: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.004626698017120361, 'time_algorithm_update': 0.012178433418273927, 'critic_loss': 86171.94511914063, 'actor_loss': -2.455379300117493, 'bc_loss': 0.04370012337714434, 'time_step': 0.0170792338848114, 'td_error': 6683.706834572334, 'value_scale': 1350.3392934615333, 'discounted_advantage': -1830.0664566754695, 'initial_state': 1089.9061279296875, 'diff_eval': 1460.7895174307296} step=123000
2025-12-06 18:56.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.07it/s, critic_loss=9.55e+4, actor_loss=-2.45, bc_loss=0.0441]


2025-12-06 18:57.14 [info     ] TD3PlusBC_20251206181445: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.00495920205116272, 'time_algorithm_update': 0.012274229049682617, 'critic_loss': 95428.5936015625, 'actor_loss': -2.4547757568359376, 'bc_loss': 0.0440428681448102, 'time_step': 0.017494553565979003, 'td_error': 7141.197343155887, 'value_scale': 1395.2701216743258, 'discounted_advantage': -1913.5063055505095, 'initial_state': 1122.48583984375, 'diff_eval': 1594.5656561674343} step=124000
2025-12-06 18:57.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.14it/s, critic_loss=1.03e+5, actor_loss=-2.45, bc_loss=0.0445]


2025-12-06 18:57.37 [info     ] TD3PlusBC_20251206181445: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.004905864238739014, 'time_algorithm_update': 0.01296852159500122, 'critic_loss': 102693.34724804688, 'actor_loss': -2.454437620162964, 'bc_loss': 0.04453878727555275, 'time_step': 0.018138168573379517, 'td_error': 7616.01387374722, 'value_scale': 1458.2120152079349, 'discounted_advantage': -1903.3781353480274, 'initial_state': 1170.1153564453125, 'diff_eval': 1436.584598956746} step=125000
2025-12-06 18:57.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.54it/s, critic_loss=1.1e+5, actor_loss=-2.45, bc_loss=0.0448]


2025-12-06 18:57.58 [info     ] TD3PlusBC_20251206181445: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.0048390858173370365, 'time_algorithm_update': 0.012256437301635743, 'critic_loss': 109949.07743554687, 'actor_loss': -2.4541622714996336, 'bc_loss': 0.0447997869476676, 'time_step': 0.0173583881855011, 'td_error': 7764.537114490264, 'value_scale': 1502.195946590299, 'discounted_advantage': -1974.5126953420245, 'initial_state': 1202.862548828125, 'diff_eval': 1281.663626119605} step=126000
2025-12-06 18:57.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.34it/s, critic_loss=1.22e+5, actor_loss=-2.46, bc_loss=0.0435]


2025-12-06 18:58.18 [info     ] TD3PlusBC_20251206181445: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.00471774697303772, 'time_algorithm_update': 0.011607767343521118, 'critic_loss': 121734.7836796875, 'actor_loss': -2.4553427233695984, 'bc_loss': 0.043544565051794056, 'time_step': 0.016564550638198852, 'td_error': 9114.466858624572, 'value_scale': 1562.150027688371, 'discounted_advantage': -2136.1741196632415, 'initial_state': 1254.7877197265625, 'diff_eval': 1388.22118308885} step=127000
2025-12-06 18:58.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.18it/s, critic_loss=1.33e+5, actor_loss=-2.46, bc_loss=0.0437]


2025-12-06 18:58.38 [info     ] TD3PlusBC_20251206181445: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.004494756460189819, 'time_algorithm_update': 0.011594692945480347, 'critic_loss': 132965.42398242187, 'actor_loss': -2.4553406991958617, 'bc_loss': 0.04374077350646258, 'time_step': 0.016328328609466553, 'td_error': 10714.429741210492, 'value_scale': 1609.3628344347972, 'discounted_advantage': -2262.8892075597814, 'initial_state': 1284.8988037109375, 'diff_eval': 1523.636754031784} step=128000
2025-12-06 18:58.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.72it/s, critic_loss=1.42e+5, actor_loss=-2.45, bc_loss=0.0446]


2025-12-06 18:58.59 [info     ] TD3PlusBC_20251206181445: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.004686605930328369, 'time_algorithm_update': 0.011957214832305909, 'critic_loss': 141837.8148769531, 'actor_loss': -2.4545187802314756, 'bc_loss': 0.04455954848974943, 'time_step': 0.016946383714675904, 'td_error': 11804.747788160466, 'value_scale': 1693.7796484027106, 'discounted_advantage': -2310.65180873847, 'initial_state': 1349.35595703125, 'diff_eval': 1863.0110167599855} step=129000
2025-12-06 18:58.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.94it/s, critic_loss=1.54e+5, actor_loss=-2.45, bc_loss=0.0445]


2025-12-06 18:59.20 [info     ] TD3PlusBC_20251206181445: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.004635066032409668, 'time_algorithm_update': 0.011730806827545166, 'critic_loss': 154120.70812109375, 'actor_loss': -2.4545177073478697, 'bc_loss': 0.04449929356575012, 'time_step': 0.01663806676864624, 'td_error': 11729.599712385296, 'value_scale': 1760.943724460394, 'discounted_advantage': -2408.2883630993724, 'initial_state': 1404.5987548828125, 'diff_eval': 1372.4667742020426} step=130000
2025-12-06 18:59.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.98it/s, critic_loss=1.67e+5, actor_loss=-2.46, bc_loss=0.0438]


2025-12-06 18:59.41 [info     ] TD3PlusBC_20251206181445: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.004747197866439819, 'time_algorithm_update': 0.012241283655166626, 'critic_loss': 166710.37862890624, 'actor_loss': -2.455158490180969, 'bc_loss': 0.04383344725519419, 'time_step': 0.01723260498046875, 'td_error': 13337.933198978391, 'value_scale': 1839.0851678840272, 'discounted_advantage': -2474.78581623177, 'initial_state': 1457.3221435546875, 'diff_eval': 1615.4110013994289} step=131000
2025-12-06 18:59.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.24it/s, critic_loss=1.84e+5, actor_loss=-2.45, bc_loss=0.0442]


2025-12-06 19:00.01 [info     ] TD3PlusBC_20251206181445: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.004568844079971313, 'time_algorithm_update': 0.011722002029418945, 'critic_loss': 183885.00443359374, 'actor_loss': -2.454892590999603, 'bc_loss': 0.04420922900736332, 'time_step': 0.016555408000946046, 'td_error': 12834.451326963683, 'value_scale': 1921.8970282683329, 'discounted_advantage': -2511.5593814429776, 'initial_state': 1530.942138671875, 'diff_eval': 1336.350306049982} step=132000
2025-12-06 19:00.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.76it/s, critic_loss=2.01e+5, actor_loss=-2.45, bc_loss=0.0445]


2025-12-06 19:00.22 [info     ] TD3PlusBC_20251206181445: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.004671878814697265, 'time_algorithm_update': 0.011768701314926147, 'critic_loss': 201145.1248515625, 'actor_loss': -2.4547185263633726, 'bc_loss': 0.044499689504504204, 'time_step': 0.016710487127304077, 'td_error': 15993.865198538706, 'value_scale': 2008.181147278804, 'discounted_advantage': -2691.628721485361, 'initial_state': 1588.4783935546875, 'diff_eval': 1668.200252326255} step=133000
2025-12-06 19:00.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.13it/s, critic_loss=2.16e+5, actor_loss=-2.45, bc_loss=0.0443]


2025-12-06 19:00.42 [info     ] TD3PlusBC_20251206181445: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.004743939161300659, 'time_algorithm_update': 0.011856804609298706, 'critic_loss': 216065.95020703124, 'actor_loss': -2.4548467531204223, 'bc_loss': 0.04429078608751297, 'time_step': 0.01686179256439209, 'td_error': 18441.19380659602, 'value_scale': 2064.6823115512693, 'discounted_advantage': -2933.225590324371, 'initial_state': 1630.9925537109375, 'diff_eval': 1503.5346069417687} step=134000
2025-12-06 19:00.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.28it/s, critic_loss=2.34e+5, actor_loss=-2.45, bc_loss=0.0443]


2025-12-06 19:01.03 [info     ] TD3PlusBC_20251206181445: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.004692468881607056, 'time_algorithm_update': 0.011636056900024415, 'critic_loss': 234278.8922890625, 'actor_loss': -2.454897305011749, 'bc_loss': 0.04430885074287653, 'time_step': 0.016565964221954346, 'td_error': 18502.30351982475, 'value_scale': 2188.5515586424553, 'discounted_advantage': -2964.554205426915, 'initial_state': 1744.102783203125, 'diff_eval': 1450.4752089407339} step=135000
2025-12-06 19:01.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.93it/s, critic_loss=2.63e+5, actor_loss=-2.45, bc_loss=0.0443]


2025-12-06 19:01.23 [info     ] TD3PlusBC_20251206181445: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.004690869092941284, 'time_algorithm_update': 0.011745050430297851, 'critic_loss': 262690.2324335938, 'actor_loss': -2.4547515397071837, 'bc_loss': 0.04429364959895611, 'time_step': 0.01666576671600342, 'td_error': 23506.127913655244, 'value_scale': 2275.4653103389564, 'discounted_advantage': -3241.329421983852, 'initial_state': 1795.8577880859375, 'diff_eval': 1825.6848640627918} step=136000
2025-12-06 19:01.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.19it/s, critic_loss=2.83e+5, actor_loss=-2.45, bc_loss=0.0444]


2025-12-06 19:01.44 [info     ] TD3PlusBC_20251206181445: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.004932419776916504, 'time_algorithm_update': 0.01226167368888855, 'critic_loss': 283078.8675390625, 'actor_loss': -2.4548558015823363, 'bc_loss': 0.04442856787145138, 'time_step': 0.0174549605846405, 'td_error': 22099.77915104057, 'value_scale': 2367.8648360349107, 'discounted_advantage': -3113.9528493363564, 'initial_state': 1888.9993896484375, 'diff_eval': 1495.9656764705676} step=137000
2025-12-06 19:01.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:18<00:00, 55.52it/s, critic_loss=3.06e+5, actor_loss=-2.46, bc_loss=0.0442]


2025-12-06 19:02.06 [info     ] TD3PlusBC_20251206181445: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.005052196979522705, 'time_algorithm_update': 0.012348487377166747, 'critic_loss': 305748.6653828125, 'actor_loss': -2.4552464122772215, 'bc_loss': 0.04418431094288826, 'time_step': 0.017670018434524537, 'td_error': 29504.31564945387, 'value_scale': 2489.57962141013, 'discounted_advantage': -3564.4470644941184, 'initial_state': 1971.289794921875, 'diff_eval': 2023.3989967045811} step=138000
2025-12-06 19:02.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.62it/s, critic_loss=3.35e+5, actor_loss=-2.46, bc_loss=0.0444]


2025-12-06 19:02.26 [info     ] TD3PlusBC_20251206181445: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.004714015960693359, 'time_algorithm_update': 0.01175492262840271, 'critic_loss': 335426.573390625, 'actor_loss': -2.455094367980957, 'bc_loss': 0.04445615182071924, 'time_step': 0.01672955870628357, 'td_error': 28886.208755538766, 'value_scale': 2575.643570854398, 'discounted_advantage': -3630.669252807171, 'initial_state': 2049.6259765625, 'diff_eval': 1587.382192873464} step=139000
2025-12-06 19:02.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.89it/s, critic_loss=3.6e+5, actor_loss=-2.45, bc_loss=0.0449]


2025-12-06 19:02.47 [info     ] TD3PlusBC_20251206181445: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.004828086853027344, 'time_algorithm_update': 0.011905062675476074, 'critic_loss': 360183.53778125, 'actor_loss': -2.454682336807251, 'bc_loss': 0.04490178399533033, 'time_step': 0.01697487449645996, 'td_error': 31770.935807987535, 'value_scale': 2703.9779814522703, 'discounted_advantage': -3711.726681945808, 'initial_state': 2155.025634765625, 'diff_eval': 1740.8572212605368} step=140000
2025-12-06 19:02.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:18<00:00, 55.40it/s, critic_loss=3.94e+5, actor_loss=-2.45, bc_loss=0.0448]


2025-12-06 19:03.08 [info     ] TD3PlusBC_20251206181445: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.005052478551864624, 'time_algorithm_update': 0.012405507564544678, 'critic_loss': 394819.6111953125, 'actor_loss': -2.4547384791374207, 'bc_loss': 0.04484301979094744, 'time_step': 0.01771107292175293, 'td_error': 37647.01643701963, 'value_scale': 2796.4504012358057, 'discounted_advantage': -4026.6709071166933, 'initial_state': 2224.52587890625, 'diff_eval': 1709.7442907652764} step=141000
2025-12-06 19:03.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.68it/s, critic_loss=4.23e+5, actor_loss=-2.46, bc_loss=0.0438]


2025-12-06 19:03.30 [info     ] TD3PlusBC_20251206181445: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.005116213083267212, 'time_algorithm_update': 0.012543219089508057, 'critic_loss': 423247.8310976563, 'actor_loss': -2.455830547809601, 'bc_loss': 0.043771910212934015, 'time_step': 0.01793222141265869, 'td_error': 41776.0070439736, 'value_scale': 2918.387072272992, 'discounted_advantage': -4239.340019372407, 'initial_state': 2326.464599609375, 'diff_eval': 1881.5540062380644} step=142000
2025-12-06 19:03.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:18<00:00, 54.29it/s, critic_loss=4.6e+5, actor_loss=-2.46, bc_loss=0.0446]


2025-12-06 19:03.52 [info     ] TD3PlusBC_20251206181445: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.00513029932975769, 'time_algorithm_update': 0.012645454883575439, 'critic_loss': 460319.640703125, 'actor_loss': -2.4551285667419434, 'bc_loss': 0.04457977556437254, 'time_step': 0.018064758777618407, 'td_error': 36797.055021639215, 'value_scale': 3027.8675711057012, 'discounted_advantage': -3958.6495385253093, 'initial_state': 2435.8310546875, 'diff_eval': 1566.5983743560223} step=143000
2025-12-06 19:03.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:20<00:00, 48.96it/s, critic_loss=4.97e+5, actor_loss=-2.46, bc_loss=0.0443]


2025-12-06 19:04.16 [info     ] TD3PlusBC_20251206181445: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.005765649557113647, 'time_algorithm_update': 0.01391229820251465, 'critic_loss': 497091.6681171875, 'actor_loss': -2.455372355937958, 'bc_loss': 0.0443197937682271, 'time_step': 0.01999947118759155, 'td_error': 42113.10664426081, 'value_scale': 3171.6743652139107, 'discounted_advantage': -4259.141020742947, 'initial_state': 2543.854736328125, 'diff_eval': 1643.6868984231105} step=144000
2025-12-06 19:04.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:19<00:00, 51.33it/s, critic_loss=5.47e+5, actor_loss=-2.46, bc_loss=0.0441]


2025-12-06 19:04.39 [info     ] TD3PlusBC_20251206181445: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.005546986103057861, 'time_algorithm_update': 0.01324334454536438, 'critic_loss': 547461.373125, 'actor_loss': -2.4557400755882264, 'bc_loss': 0.04405223580449819, 'time_step': 0.019092544078826903, 'td_error': 46765.45908695175, 'value_scale': 3304.1989103510714, 'discounted_advantage': -4497.619153963115, 'initial_state': 2641.939453125, 'diff_eval': 1675.040581973368} step=145000
2025-12-06 19:04.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.17it/s, critic_loss=5.9e+5, actor_loss=-2.46, bc_loss=0.0438]


2025-12-06 19:05.00 [info     ] TD3PlusBC_20251206181445: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.004855505704879761, 'time_algorithm_update': 0.01204502820968628, 'critic_loss': 590763.077671875, 'actor_loss': -2.4559752435684206, 'bc_loss': 0.04379851441085338, 'time_step': 0.017155012130737304, 'td_error': 49545.65824809613, 'value_scale': 3429.478544991454, 'discounted_advantage': -4500.231680573249, 'initial_state': 2774.4443359375, 'diff_eval': 1433.619808620473} step=146000
2025-12-06 19:05.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.97it/s, critic_loss=6.44e+5, actor_loss=-2.46, bc_loss=0.0441]


2025-12-06 19:05.20 [info     ] TD3PlusBC_20251206181445: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.004815162420272827, 'time_algorithm_update': 0.011558393239974976, 'critic_loss': 644555.81365625, 'actor_loss': -2.455676965236664, 'bc_loss': 0.04410920498520136, 'time_step': 0.016634991407394408, 'td_error': 55549.271026479226, 'value_scale': 3554.2427522158882, 'discounted_advantage': -4917.370695009657, 'initial_state': 2856.20458984375, 'diff_eval': 1637.2521896941728} step=147000
2025-12-06 19:05.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:19<00:00, 51.02it/s, critic_loss=6.87e+5, actor_loss=-2.46, bc_loss=0.0441]


2025-12-06 19:05.43 [info     ] TD3PlusBC_20251206181445: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.005811504364013672, 'time_algorithm_update': 0.013168505430221557, 'critic_loss': 688064.478953125, 'actor_loss': -2.4557080841064454, 'bc_loss': 0.04410433805733919, 'time_step': 0.01924628758430481, 'td_error': 52308.0464013012, 'value_scale': 3703.0204350614586, 'discounted_advantage': -4854.8980460236635, 'initial_state': 3004.219970703125, 'diff_eval': 1305.228549368886} step=148000
2025-12-06 19:05.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:18<00:00, 53.84it/s, critic_loss=7.42e+5, actor_loss=-2.46, bc_loss=0.0444]


2025-12-06 19:06.05 [info     ] TD3PlusBC_20251206181445: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.0049739575386047365, 'time_algorithm_update': 0.012953574180603028, 'critic_loss': 742983.389265625, 'actor_loss': -2.4554991903305052, 'bc_loss': 0.044355335704982284, 'time_step': 0.018205287218093873, 'td_error': 71131.16237086618, 'value_scale': 3860.6247949464323, 'discounted_advantage': -5501.885187233878, 'initial_state': 3116.588134765625, 'diff_eval': 1640.5527472530482} step=149000
2025-12-06 19:06.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.41it/s, critic_loss=8.07e+5, actor_loss=-2.46, bc_loss=0.0443]


2025-12-06 19:06.26 [info     ] TD3PlusBC_20251206181445: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.004770515203475953, 'time_algorithm_update': 0.01205218768119812, 'critic_loss': 805324.881859375, 'actor_loss': -2.455654640674591, 'bc_loss': 0.04424740486219526, 'time_step': 0.017090201616287232, 'td_error': 72756.22661198777, 'value_scale': 4034.955756009175, 'discounted_advantage': -5494.09193136544, 'initial_state': 3258.7001953125, 'diff_eval': 1842.2326128763941} step=150000
2025-12-06 19:06.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.86it/s, critic_loss=8.66e+5, actor_loss=-2.46, bc_loss=0.0439]


2025-12-06 19:06.47 [info     ] TD3PlusBC_20251206181445: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.004773541212081909, 'time_algorithm_update': 0.011899879932403564, 'critic_loss': 867655.8793125, 'actor_loss': -2.456045501708984, 'bc_loss': 0.04386974766850472, 'time_step': 0.01756864619255066, 'td_error': 71440.31624166781, 'value_scale': 4183.3596927102435, 'discounted_advantage': -5631.9560564479525, 'initial_state': 3391.820556640625, 'diff_eval': 1404.538361721157} step=151000
2025-12-06 19:06.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.32it/s, critic_loss=9.43e+5, actor_loss=-2.46, bc_loss=0.0442]


2025-12-06 19:07.08 [info     ] TD3PlusBC_20251206181445: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.004722792625427246, 'time_algorithm_update': 0.011866405010223388, 'critic_loss': 942451.42065625, 'actor_loss': -2.4557890615463256, 'bc_loss': 0.04413789064437151, 'time_step': 0.016843721628189087, 'td_error': 85119.1803915843, 'value_scale': 4355.4282932585265, 'discounted_advantage': -5805.827135485609, 'initial_state': 3511.9638671875, 'diff_eval': 1728.2511203310835} step=152000
2025-12-06 19:07.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.60it/s, critic_loss=1.02e+6, actor_loss=-2.46, bc_loss=0.0441]


2025-12-06 19:07.29 [info     ] TD3PlusBC_20251206181445: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.004916114568710327, 'time_algorithm_update': 0.011889708042144776, 'critic_loss': 1020734.911625, 'actor_loss': -2.455827744960785, 'bc_loss': 0.044099539838731286, 'time_step': 0.01704928183555603, 'td_error': 93630.44462876358, 'value_scale': 4493.557846696059, 'discounted_advantage': -6328.430674995387, 'initial_state': 3622.051513671875, 'diff_eval': 1678.2622495166656} step=153000
2025-12-06 19:07.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.32it/s, critic_loss=1.11e+6, actor_loss=-2.46, bc_loss=0.0441]


2025-12-06 19:07.49 [info     ] TD3PlusBC_20251206181445: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.004666916131973267, 'time_algorithm_update': 0.011603054761886597, 'critic_loss': 1108313.376046875, 'actor_loss': -2.455935827255249, 'bc_loss': 0.04404774073511362, 'time_step': 0.016535566806793214, 'td_error': 105283.12148420804, 'value_scale': 4688.267433339172, 'discounted_advantage': -6679.020299082774, 'initial_state': 3791.801025390625, 'diff_eval': 1701.0743335294526} step=154000
2025-12-06 19:07.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.72it/s, critic_loss=1.18e+6, actor_loss=-2.46, bc_loss=0.0439]


2025-12-06 19:08.11 [info     ] TD3PlusBC_20251206181445: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.0049300544261932375, 'time_algorithm_update': 0.012420944690704346, 'critic_loss': 1175985.604125, 'actor_loss': -2.4560892038345337, 'bc_loss': 0.04390056283399463, 'time_step': 0.017610371589660643, 'td_error': 109264.14845122975, 'value_scale': 4859.023275524053, 'discounted_advantage': -6895.902573654779, 'initial_state': 3986.654052734375, 'diff_eval': 1863.7670277194693} step=155000
2025-12-06 19:08.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.92it/s, critic_loss=1.25e+6, actor_loss=-2.46, bc_loss=0.044]


2025-12-06 19:08.32 [info     ] TD3PlusBC_20251206181445: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.004874979496002197, 'time_algorithm_update': 0.012089200496673584, 'critic_loss': 1252686.35703125, 'actor_loss': -2.456039656639099, 'bc_loss': 0.043950446788221596, 'time_step': 0.01722540020942688, 'td_error': 116756.9719620895, 'value_scale': 5051.968712038486, 'discounted_advantage': -6851.781422453427, 'initial_state': 4131.5791015625, 'diff_eval': 2149.035735423025} step=156000
2025-12-06 19:08.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.15it/s, critic_loss=1.35e+6, actor_loss=-2.46, bc_loss=0.0443]


2025-12-06 19:08.53 [info     ] TD3PlusBC_20251206181445: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.004735436916351319, 'time_algorithm_update': 0.012473755836486817, 'critic_loss': 1355143.94896875, 'actor_loss': -2.4556443972587587, 'bc_loss': 0.044342191725969315, 'time_step': 0.01747165560722351, 'td_error': 120691.45587755699, 'value_scale': 5201.956442795376, 'discounted_advantage': -7179.617210088801, 'initial_state': 4245.31201171875, 'diff_eval': 1856.168024059154} step=157000
2025-12-06 19:08.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.13it/s, critic_loss=1.44e+6, actor_loss=-2.46, bc_loss=0.0449]


2025-12-06 19:09.14 [info     ] TD3PlusBC_20251206181445: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.004758338212966919, 'time_algorithm_update': 0.01187327241897583, 'critic_loss': 1435280.5228125, 'actor_loss': -2.4551199345588683, 'bc_loss': 0.044871660970151425, 'time_step': 0.016894332885742186, 'td_error': 131210.39797782913, 'value_scale': 5437.534963475089, 'discounted_advantage': -7535.226905981606, 'initial_state': 4469.97802734375, 'diff_eval': 1896.1178823908133} step=158000
2025-12-06 19:09.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.36it/s, critic_loss=1.57e+6, actor_loss=-2.46, bc_loss=0.0444]


2025-12-06 19:09.34 [info     ] TD3PlusBC_20251206181445: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.004763022899627685, 'time_algorithm_update': 0.011798126935958862, 'critic_loss': 1569352.5483125, 'actor_loss': -2.4555869898796083, 'bc_loss': 0.04440797316282988, 'time_step': 0.016812993049621583, 'td_error': 126400.11809358033, 'value_scale': 5578.764139487178, 'discounted_advantage': -7567.793717994493, 'initial_state': 4580.798828125, 'diff_eval': 1370.5359938513848} step=159000
2025-12-06 19:09.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.51it/s, critic_loss=1.66e+6, actor_loss=-2.46, bc_loss=0.0441]


2025-12-06 19:09.55 [info     ] TD3PlusBC_20251206181445: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.004746484518051148, 'time_algorithm_update': 0.011775108814239502, 'critic_loss': 1656976.53846875, 'actor_loss': -2.4559486618041992, 'bc_loss': 0.04404908875375986, 'time_step': 0.016786163806915284, 'td_error': 139836.65884101312, 'value_scale': 5815.684095333128, 'discounted_advantage': -7876.358206787496, 'initial_state': 4783.7294921875, 'diff_eval': 1483.5691138708403} step=160000
2025-12-06 19:09.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.61it/s, critic_loss=1.79e+6, actor_loss=-2.46, bc_loss=0.0447]


2025-12-06 19:10.15 [info     ] TD3PlusBC_20251206181445: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.00460457706451416, 'time_algorithm_update': 0.011649567127227783, 'critic_loss': 1796751.9811875, 'actor_loss': -2.455290413856506, 'bc_loss': 0.04470815946906805, 'time_step': 0.016496909379959106, 'td_error': 166277.49181202834, 'value_scale': 6067.328671195581, 'discounted_advantage': -8177.036510654465, 'initial_state': 4999.6875, 'diff_eval': 1949.4180573610065} step=161000
2025-12-06 19:10.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.76it/s, critic_loss=1.9e+6, actor_loss=-2.46, bc_loss=0.0443]


2025-12-06 19:10.36 [info     ] TD3PlusBC_20251206181445: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.004852664232254028, 'time_algorithm_update': 0.011900946617126465, 'critic_loss': 1898245.1964375, 'actor_loss': -2.455714740753174, 'bc_loss': 0.04427882281690836, 'time_step': 0.017001382827758788, 'td_error': 158775.472577119, 'value_scale': 6270.350356735908, 'discounted_advantage': -8146.422035430662, 'initial_state': 5158.5048828125, 'diff_eval': 1548.799793247126} step=162000
2025-12-06 19:10.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.69it/s, critic_loss=2.05e+6, actor_loss=-2.46, bc_loss=0.0445]


2025-12-06 19:10.56 [info     ] TD3PlusBC_20251206181445: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.004729889392852783, 'time_algorithm_update': 0.011751489877700807, 'critic_loss': 2053486.270875, 'actor_loss': -2.4555340662002565, 'bc_loss': 0.04445816095918417, 'time_step': 0.01673608112335205, 'td_error': 180912.8335929055, 'value_scale': 6506.115463576592, 'discounted_advantage': -8919.5794806649, 'initial_state': 5348.869140625, 'diff_eval': 1690.9390461071678} step=163000
2025-12-06 19:10.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.18it/s, critic_loss=2.24e+6, actor_loss=-2.45, bc_loss=0.0451]


2025-12-06 19:11.17 [info     ] TD3PlusBC_20251206181445: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.004755128145217896, 'time_algorithm_update': 0.01187704062461853, 'critic_loss': 2242288.1629375, 'actor_loss': -2.4549364438056944, 'bc_loss': 0.04506308320164681, 'time_step': 0.01688464164733887, 'td_error': 171605.78533186467, 'value_scale': 6716.793369852787, 'discounted_advantage': -8941.215855382972, 'initial_state': 5571.08984375, 'diff_eval': 1437.3847361562582} step=164000
2025-12-06 19:11.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.65it/s, critic_loss=2.38e+6, actor_loss=-2.46, bc_loss=0.0444]


2025-12-06 19:11.37 [info     ] TD3PlusBC_20251206181445: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.0046664118766784664, 'time_algorithm_update': 0.011782115697860718, 'critic_loss': 2378202.26396875, 'actor_loss': -2.4555600519180296, 'bc_loss': 0.04443600280582905, 'time_step': 0.016710237979888916, 'td_error': 202196.13643520602, 'value_scale': 6990.468367110554, 'discounted_advantage': -9485.687789122285, 'initial_state': 5800.646484375, 'diff_eval': 1664.7285093995047} step=165000
2025-12-06 19:11.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.62it/s, critic_loss=2.51e+6, actor_loss=-2.46, bc_loss=0.0444]


2025-12-06 19:11.58 [info     ] TD3PlusBC_20251206181445: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.004821807861328125, 'time_algorithm_update': 0.011960350513458251, 'critic_loss': 2510671.3579375, 'actor_loss': -2.4556138858795165, 'bc_loss': 0.04438360009342432, 'time_step': 0.017046064853668212, 'td_error': 200550.30830244694, 'value_scale': 7211.368949896532, 'discounted_advantage': -9631.849787272808, 'initial_state': 5981.9345703125, 'diff_eval': 1453.6311202812776} step=166000
2025-12-06 19:11.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.35it/s, critic_loss=2.67e+6, actor_loss=-2.46, bc_loss=0.0447]


2025-12-06 19:12.18 [info     ] TD3PlusBC_20251206181445: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.0046000869274139405, 'time_algorithm_update': 0.011690969467163087, 'critic_loss': 2673141.4431875, 'actor_loss': -2.4552970476150513, 'bc_loss': 0.04469583367556334, 'time_step': 0.016543553352355956, 'td_error': 230629.93476580083, 'value_scale': 7433.103837735095, 'discounted_advantage': -10171.094815230632, 'initial_state': 6162.00390625, 'diff_eval': 1529.2057310727016} step=167000
2025-12-06 19:12.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.89it/s, critic_loss=2.91e+6, actor_loss=-2.46, bc_loss=0.0445]


2025-12-06 19:12.38 [info     ] TD3PlusBC_20251206181445: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.004543191909790039, 'time_algorithm_update': 0.011638981819152832, 'critic_loss': 2907223.8325, 'actor_loss': -2.455474713802338, 'bc_loss': 0.04451630605757236, 'time_step': 0.016414096355438234, 'td_error': 252451.90629773852, 'value_scale': 7690.039068230039, 'discounted_advantage': -10557.71448003986, 'initial_state': 6354.95556640625, 'diff_eval': 1553.9158312403226} step=168000
2025-12-06 19:12.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.05it/s, critic_loss=3.14e+6, actor_loss=-2.46, bc_loss=0.0446]


2025-12-06 19:12.59 [info     ] TD3PlusBC_20251206181445: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.004736858129501343, 'time_algorithm_update': 0.011923449993133545, 'critic_loss': 3141391.8720625, 'actor_loss': -2.4553556571006774, 'bc_loss': 0.04462137681990862, 'time_step': 0.01691804885864258, 'td_error': 266157.22817241057, 'value_scale': 7992.145575959045, 'discounted_advantage': -10827.67760700563, 'initial_state': 6641.7763671875, 'diff_eval': 1651.6281215439383} step=169000
2025-12-06 19:12.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.45it/s, critic_loss=3.32e+6, actor_loss=-2.46, bc_loss=0.0446]


2025-12-06 19:13.19 [info     ] TD3PlusBC_20251206181445: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.004699771881103516, 'time_algorithm_update': 0.011869747638702393, 'critic_loss': 3329101.9589375, 'actor_loss': -2.4553935718536377, 'bc_loss': 0.044588442854583264, 'time_step': 0.01679700756072998, 'td_error': 307172.41429971193, 'value_scale': 8248.69906240177, 'discounted_advantage': -11526.137701497226, 'initial_state': 6877.06396484375, 'diff_eval': 1664.744817719944} step=170000
2025-12-06 19:13.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:16<00:00, 60.32it/s, critic_loss=3.58e+6, actor_loss=-2.46, bc_loss=0.0446]


2025-12-06 19:13.39 [info     ] TD3PlusBC_20251206181445: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.004551589488983154, 'time_algorithm_update': 0.011433768272399902, 'critic_loss': 3583270.105375, 'actor_loss': -2.455342695236206, 'bc_loss': 0.04463650507479906, 'time_step': 0.01624538803100586, 'td_error': 309200.5176195019, 'value_scale': 8572.516662546495, 'discounted_advantage': -11572.045389972918, 'initial_state': 7140.4013671875, 'diff_eval': 1601.7303721321694} step=171000
2025-12-06 19:13.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.86it/s, critic_loss=3.87e+6, actor_loss=-2.46, bc_loss=0.045]


2025-12-06 19:14.00 [info     ] TD3PlusBC_20251206181445: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.004760876417160034, 'time_algorithm_update': 0.011957081317901611, 'critic_loss': 3876666.156, 'actor_loss': -2.4550294466018676, 'bc_loss': 0.04494840305298567, 'time_step': 0.016955966234207153, 'td_error': 318326.2553245504, 'value_scale': 8864.951987587097, 'discounted_advantage': -11990.99266272509, 'initial_state': 7416.79931640625, 'diff_eval': 1683.587942681707} step=172000
2025-12-06 19:14.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.35it/s, critic_loss=4.08e+6, actor_loss=-2.46, bc_loss=0.0447]


2025-12-06 19:14.20 [info     ] TD3PlusBC_20251206181445: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.004545186519622803, 'time_algorithm_update': 0.011769040107727051, 'critic_loss': 4080323.298, 'actor_loss': -2.4552875657081605, 'bc_loss': 0.044679664395749566, 'time_step': 0.016548494815826417, 'td_error': 336790.24102587195, 'value_scale': 9166.264300542225, 'discounted_advantage': -12309.405699097364, 'initial_state': 7672.35986328125, 'diff_eval': 1579.142223373317} step=173000
2025-12-06 19:14.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.76it/s, critic_loss=4.38e+6, actor_loss=-2.46, bc_loss=0.0446]


2025-12-06 19:14.41 [info     ] TD3PlusBC_20251206181445: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.004937235593795776, 'time_algorithm_update': 0.012111416816711426, 'critic_loss': 4380629.091875, 'actor_loss': -2.455390416145325, 'bc_loss': 0.04456331990659237, 'time_step': 0.017299085378646852, 'td_error': 348621.9079629004, 'value_scale': 9429.096040992705, 'discounted_advantage': -12504.014368464535, 'initial_state': 7884.15869140625, 'diff_eval': 1543.4905362222637} step=174000
2025-12-06 19:14.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.32it/s, critic_loss=4.66e+6, actor_loss=-2.45, bc_loss=0.045]


2025-12-06 19:15.02 [info     ] TD3PlusBC_20251206181445: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.004935459613800049, 'time_algorithm_update': 0.012256761312484741, 'critic_loss': 4662823.180875, 'actor_loss': -2.454963806629181, 'bc_loss': 0.0449824363514781, 'time_step': 0.01743380832672119, 'td_error': 402065.1192942145, 'value_scale': 9759.629682056462, 'discounted_advantage': -13318.935364716815, 'initial_state': 8152.50244140625, 'diff_eval': 1619.7463649647857} step=175000
2025-12-06 19:15.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:17<00:00, 55.91it/s, critic_loss=5.01e+6, actor_loss=-2.45, bc_loss=0.0451]


2025-12-06 19:15.24 [info     ] TD3PlusBC_20251206181445: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.004855273246765136, 'time_algorithm_update': 0.012460476398468017, 'critic_loss': 5019355.599, 'actor_loss': -2.454869842529297, 'bc_loss': 0.045066353999078275, 'time_step': 0.017560702800750734, 'td_error': 415190.544604042, 'value_scale': 10080.96757779757, 'discounted_advantage': -13456.747155091663, 'initial_state': 8404.29296875, 'diff_eval': 1549.7004448440994} step=176000
2025-12-06 19:15.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.16it/s, critic_loss=5.31e+6, actor_loss=-2.46, bc_loss=0.0445]


2025-12-06 19:15.44 [info     ] TD3PlusBC_20251206181445: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.004577057361602783, 'time_algorithm_update': 0.011787873506546021, 'critic_loss': 5309036.865625, 'actor_loss': -2.4554367480278017, 'bc_loss': 0.044465531803667545, 'time_step': 0.016605218410491944, 'td_error': 472654.13974689087, 'value_scale': 10526.592050429263, 'discounted_advantage': -14293.552038244106, 'initial_state': 8824.3984375, 'diff_eval': 1752.35293573334} step=177000
2025-12-06 19:15.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.79it/s, critic_loss=5.63e+6, actor_loss=-2.46, bc_loss=0.0447]


2025-12-06 19:16.04 [info     ] TD3PlusBC_20251206181445: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.004643857479095459, 'time_algorithm_update': 0.011826302766799926, 'critic_loss': 5636982.037, 'actor_loss': -2.4552647409439086, 'bc_loss': 0.044657046742737294, 'time_step': 0.016720619678497313, 'td_error': 523446.3379445811, 'value_scale': 10845.24593351353, 'discounted_advantage': -14890.023142872798, 'initial_state': 9094.9892578125, 'diff_eval': 1882.415042985473} step=178000
2025-12-06 19:16.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:16<00:00, 61.38it/s, critic_loss=6.21e+6, actor_loss=-2.46, bc_loss=0.0448]


2025-12-06 19:16.24 [info     ] TD3PlusBC_20251206181445: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.004438817739486694, 'time_algorithm_update': 0.011280606031417846, 'critic_loss': 6208234.148875, 'actor_loss': -2.455103891372681, 'bc_loss': 0.044808097243309024, 'time_step': 0.015983893632888792, 'td_error': 532330.9491337192, 'value_scale': 11243.421149331387, 'discounted_advantage': -15125.021436834933, 'initial_state': 9485.73828125, 'diff_eval': 1923.4672529546845} step=179000
2025-12-06 19:16.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:16<00:00, 59.93it/s, critic_loss=6.42e+6, actor_loss=-2.45, bc_loss=0.0457]


2025-12-06 19:16.44 [info     ] TD3PlusBC_20251206181445: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.0045908458232879635, 'time_algorithm_update': 0.011543193101882935, 'critic_loss': 6426511.1491875, 'actor_loss': -2.4541622219085695, 'bc_loss': 0.04571735917776823, 'time_step': 0.016384658098220824, 'td_error': 539502.756625789, 'value_scale': 11581.24661886722, 'discounted_advantage': -15438.180912001628, 'initial_state': 9740.794921875, 'diff_eval': 1662.252743750945} step=180000
2025-12-06 19:16.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.99it/s, critic_loss=6.9e+6, actor_loss=-2.45, bc_loss=0.045] 


2025-12-06 19:17.05 [info     ] TD3PlusBC_20251206181445: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.004813645124435425, 'time_algorithm_update': 0.011864101886749268, 'critic_loss': 6894662.4015, 'actor_loss': -2.4548395590782164, 'bc_loss': 0.045029711201787, 'time_step': 0.01693930983543396, 'td_error': 541233.4114837989, 'value_scale': 11991.604847859257, 'discounted_advantage': -15593.839816367336, 'initial_state': 10153.443359375, 'diff_eval': 1591.5904709524034} step=181000
2025-12-06 19:17.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.92it/s, critic_loss=7.51e+6, actor_loss=-2.45, bc_loss=0.0453]


2025-12-06 19:17.26 [info     ] TD3PlusBC_20251206181445: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.004751147985458374, 'time_algorithm_update': 0.0119478600025177, 'critic_loss': 7502281.66025, 'actor_loss': -2.454508167266846, 'bc_loss': 0.04532287399470806, 'time_step': 0.016943723917007446, 'td_error': 689273.2700967859, 'value_scale': 12472.855142956307, 'discounted_advantage': -17250.363541432074, 'initial_state': 10572.59765625, 'diff_eval': 1802.6943692058846} step=182000
2025-12-06 19:17.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.79it/s, critic_loss=7.88e+6, actor_loss=-2.46, bc_loss=0.0446]


2025-12-06 19:17.46 [info     ] TD3PlusBC_20251206181445: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.0047735700607299805, 'time_algorithm_update': 0.011969380855560303, 'critic_loss': 7888191.5175, 'actor_loss': -2.4552146196365356, 'bc_loss': 0.04463872921466827, 'time_step': 0.016993589401245117, 'td_error': 680998.633273271, 'value_scale': 12796.274915850272, 'discounted_advantage': -17397.172141924177, 'initial_state': 10869.419921875, 'diff_eval': 1697.483237469533} step=183000
2025-12-06 19:17.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:17<00:00, 56.58it/s, critic_loss=8.48e+6, actor_loss=-2.45, bc_loss=0.0447]


2025-12-06 19:18.07 [info     ] TD3PlusBC_20251206181445: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.00481567645072937, 'time_algorithm_update': 0.012276456117630005, 'critic_loss': 8480619.262375, 'actor_loss': -2.454992884159088, 'bc_loss': 0.044734858483076095, 'time_step': 0.017345359802246094, 'td_error': 656378.1358812968, 'value_scale': 13248.29996062644, 'discounted_advantage': -16701.879606509054, 'initial_state': 11250.3564453125, 'diff_eval': 1431.7320869063817} step=184000
2025-12-06 19:18.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.94it/s, critic_loss=9.02e+6, actor_loss=-2.45, bc_loss=0.0455]


2025-12-06 19:18.28 [info     ] TD3PlusBC_20251206181445: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.004636446475982666, 'time_algorithm_update': 0.011796449661254883, 'critic_loss': 9017667.7725, 'actor_loss': -2.4542323417663576, 'bc_loss': 0.04550299161672592, 'time_step': 0.016663410663604737, 'td_error': 841022.2333975073, 'value_scale': 13732.332983664475, 'discounted_advantage': -18786.283894295884, 'initial_state': 11637.2236328125, 'diff_eval': 1840.4941305394289} step=185000
2025-12-06 19:18.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.90it/s, critic_loss=9.61e+6, actor_loss=-2.45, bc_loss=0.0456]


2025-12-06 19:18.48 [info     ] TD3PlusBC_20251206181445: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.0047824993133544925, 'time_algorithm_update': 0.011909069299697875, 'critic_loss': 9606191.6185, 'actor_loss': -2.454112245082855, 'bc_loss': 0.045587777465581895, 'time_step': 0.016944928407669067, 'td_error': 786498.2699170121, 'value_scale': 14225.1327981749, 'discounted_advantage': -18677.42083257518, 'initial_state': 12140.5341796875, 'diff_eval': 1517.366021172973} step=186000
2025-12-06 19:18.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.74it/s, critic_loss=1.05e+7, actor_loss=-2.45, bc_loss=0.046]


2025-12-06 19:19.09 [info     ] TD3PlusBC_20251206181445: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.004764683961868286, 'time_algorithm_update': 0.011968739271163941, 'critic_loss': 10470896.8065, 'actor_loss': -2.453703468322754, 'bc_loss': 0.04600385481119156, 'time_step': 0.016989547729492186, 'td_error': 904301.3835284205, 'value_scale': 14798.79974820568, 'discounted_advantage': -19858.229742629035, 'initial_state': 12555.474609375, 'diff_eval': 1592.055759329602} step=187000
2025-12-06 19:19.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.57it/s, critic_loss=1.12e+7, actor_loss=-2.45, bc_loss=0.0465]


2025-12-06 19:19.30 [info     ] TD3PlusBC_20251206181445: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.004689935445785522, 'time_algorithm_update': 0.011824605703353882, 'critic_loss': 11229848.60475, 'actor_loss': -2.4532066459655764, 'bc_loss': 0.04651305408030748, 'time_step': 0.016762977600097657, 'td_error': 1191472.3273905832, 'value_scale': 15425.339990275304, 'discounted_advantage': -21825.773496493737, 'initial_state': 13159.2685546875, 'diff_eval': 2348.8294477287463} step=188000
2025-12-06 19:19.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.37it/s, critic_loss=1.19e+7, actor_loss=-2.45, bc_loss=0.0459]


2025-12-06 19:19.50 [info     ] TD3PlusBC_20251206181445: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.004725766658782959, 'time_algorithm_update': 0.011855091571807861, 'critic_loss': 11855575.28075, 'actor_loss': -2.453741928577423, 'bc_loss': 0.04591945710033178, 'time_step': 0.016825146675109863, 'td_error': 1168798.2416964571, 'value_scale': 15975.058949831046, 'discounted_advantage': -21918.755464165206, 'initial_state': 13539.412109375, 'diff_eval': 1854.841352502418} step=189000
2025-12-06 19:19.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.72it/s, critic_loss=1.28e+7, actor_loss=-2.45, bc_loss=0.0471]


2025-12-06 19:20.11 [info     ] TD3PlusBC_20251206181445: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.004790985345840454, 'time_algorithm_update': 0.011949659824371338, 'critic_loss': 12844058.2345, 'actor_loss': -2.4526341729164125, 'bc_loss': 0.047081521295011045, 'time_step': 0.017002955675125123, 'td_error': 1151980.0433832973, 'value_scale': 16569.779195371437, 'discounted_advantage': -21951.490931360935, 'initial_state': 14207.5654296875, 'diff_eval': 1636.1937296974} step=190000
2025-12-06 19:20.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.49it/s, critic_loss=1.36e+7, actor_loss=-2.45, bc_loss=0.046]


2025-12-06 19:20.31 [info     ] TD3PlusBC_20251206181445: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.004673400402069092, 'time_algorithm_update': 0.011849964618682861, 'critic_loss': 13642708.5825, 'actor_loss': -2.4536275854110716, 'bc_loss': 0.04603211513906717, 'time_step': 0.016778467893600463, 'td_error': 1296042.55046062, 'value_scale': 17196.018802716368, 'discounted_advantage': -23050.99928021674, 'initial_state': 14615.4404296875, 'diff_eval': 1838.6186270933845} step=191000
2025-12-06 19:20.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.96it/s, critic_loss=1.47e+7, actor_loss=-2.45, bc_loss=0.0461]


2025-12-06 19:20.52 [info     ] TD3PlusBC_20251206181445: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.004723456859588623, 'time_algorithm_update': 0.01194715690612793, 'critic_loss': 14744574.945, 'actor_loss': -2.453502362728119, 'bc_loss': 0.04608088409900665, 'time_step': 0.01691792321205139, 'td_error': 1386654.4187852312, 'value_scale': 17847.8450593141, 'discounted_advantage': -23673.077556479777, 'initial_state': 15311.4326171875, 'diff_eval': 1881.092973918042} step=192000
2025-12-06 19:20.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.74it/s, critic_loss=1.58e+7, actor_loss=-2.45, bc_loss=0.0471]


2025-12-06 19:21.12 [info     ] TD3PlusBC_20251206181445: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.004649658679962158, 'time_algorithm_update': 0.011789174079895019, 'critic_loss': 15826882.87975, 'actor_loss': -2.452488217830658, 'bc_loss': 0.04705347542464733, 'time_step': 0.016697521686553956, 'td_error': 1473201.2390189164, 'value_scale': 18591.375038882412, 'discounted_advantage': -24464.78789174957, 'initial_state': 15936.72265625, 'diff_eval': 1665.3101179556866} step=193000
2025-12-06 19:21.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.46it/s, critic_loss=1.72e+7, actor_loss=-2.45, bc_loss=0.0472]


2025-12-06 19:21.33 [info     ] TD3PlusBC_20251206181445: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.00479469633102417, 'time_algorithm_update': 0.012031455993652344, 'critic_loss': 17154057.375, 'actor_loss': -2.4523336777687073, 'bc_loss': 0.047210962690413, 'time_step': 0.017087958335876466, 'td_error': 1820226.591103697, 'value_scale': 19449.730648836965, 'discounted_advantage': -25532.943760101203, 'initial_state': 16623.66796875, 'diff_eval': 2004.1652370305621} step=194000
2025-12-06 19:21.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:16<00:00, 58.92it/s, critic_loss=1.83e+7, actor_loss=-2.45, bc_loss=0.047]


2025-12-06 19:21.54 [info     ] TD3PlusBC_20251206181445: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.00465505599975586, 'time_algorithm_update': 0.011762554883956909, 'critic_loss': 18340120.545, 'actor_loss': -2.452368408679962, 'bc_loss': 0.04705588553100824, 'time_step': 0.01665615916252136, 'td_error': 1806898.2206525747, 'value_scale': 20214.049368795186, 'discounted_advantage': -25953.812548411763, 'initial_state': 17396.44921875, 'diff_eval': 1804.7997677175824} step=195000
2025-12-06 19:21.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.55it/s, critic_loss=1.95e+7, actor_loss=-2.45, bc_loss=0.0476]


2025-12-06 19:22.15 [info     ] TD3PlusBC_20251206181445: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.00486417818069458, 'time_algorithm_update': 0.011933507680892944, 'critic_loss': 19531026.66575, 'actor_loss': -2.4518461089134216, 'bc_loss': 0.04764411050826311, 'time_step': 0.01705004906654358, 'td_error': 1836210.6239400897, 'value_scale': 21024.558793892105, 'discounted_advantage': -27449.272849134253, 'initial_state': 18218.01171875, 'diff_eval': 1516.5514778022718} step=196000
2025-12-06 19:22.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.51it/s, critic_loss=2.15e+7, actor_loss=-2.45, bc_loss=0.0485]


2025-12-06 19:22.35 [info     ] TD3PlusBC_20251206181445: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.004717526912689209, 'time_algorithm_update': 0.011803137063980102, 'critic_loss': 21484619.1075, 'actor_loss': -2.45096147108078, 'bc_loss': 0.04847488334029913, 'time_step': 0.016768671989440917, 'td_error': 2215289.8167084195, 'value_scale': 21898.517583036464, 'discounted_advantage': -29405.28829364109, 'initial_state': 19001.421875, 'diff_eval': 1938.9112856465908} step=197000
2025-12-06 19:22.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.62it/s, critic_loss=2.29e+7, actor_loss=-2.45, bc_loss=0.0483]


2025-12-06 19:22.56 [info     ] TD3PlusBC_20251206181445: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.004683259010314941, 'time_algorithm_update': 0.01178587293624878, 'critic_loss': 22881316.445, 'actor_loss': -2.451152997493744, 'bc_loss': 0.04833477617055178, 'time_step': 0.016740588188171385, 'td_error': 2396860.319044979, 'value_scale': 22747.069172222076, 'discounted_advantage': -30921.2530405352, 'initial_state': 19853.83203125, 'diff_eval': 1599.7676756552016} step=198000
2025-12-06 19:22.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:17<00:00, 57.18it/s, critic_loss=2.45e+7, actor_loss=-2.45, bc_loss=0.0482]


2025-12-06 19:23.16 [info     ] TD3PlusBC_20251206181445: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.0048705217838287355, 'time_algorithm_update': 0.012025310754776001, 'critic_loss': 24497952.9305, 'actor_loss': -2.4512781167030333, 'bc_loss': 0.04819663402438164, 'time_step': 0.01717294430732727, 'td_error': 2365853.1191255534, 'value_scale': 23678.450250975744, 'discounted_advantage': -30622.10712483153, 'initial_state': 20714.232421875, 'diff_eval': 1605.8484106944557} step=199000
2025-12-06 19:23.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:17<00:00, 58.39it/s, critic_loss=2.67e+7, actor_loss=-2.45, bc_loss=0.049]


2025-12-06 19:23.37 [info     ] TD3PlusBC_20251206181445: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.004709884881973266, 'time_algorithm_update': 0.011847317457199096, 'critic_loss': 26759461.6395, 'actor_loss': -2.4505009422302244, 'bc_loss': 0.04899658942222595, 'time_step': 0.01681410551071167, 'td_error': 2818645.8020374454, 'value_scale': 24669.374190427232, 'discounted_advantage': -33241.80945343681, 'initial_state': 21610.83203125, 'diff_eval': 1682.0906646754047} step=200000
2025-12-06 19:23.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\TD3PlusBC_20251206181445\model_200000.d3
Training model:  PRDC
2025-12-06 19:23.37 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_siz

Epoch 1/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.88it/s, critic_loss=0.218, actor_loss=-2.33, dc_loss=0.0431]


2025-12-06 19:24.21 [info     ] PRDC_20251206192337: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.005058232069015503, 'time_algorithm_update': 0.03437108635902405, 'critic_loss': 0.2190002474859357, 'actor_loss': -2.32768030166626, 'dc_loss': 0.04312181412428617, 'time_step': 0.039695283889770505, 'td_error': 0.7435417495912549, 'value_scale': 1.697208230996509, 'discounted_advantage': -2.1598098657237355, 'initial_state': 1.5161300897598267, 'diff_eval': 5009.25369867643} step=1000
2025-12-06 19:24.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.12it/s, critic_loss=0.629, actor_loss=-2.3, dc_loss=0.0316]


2025-12-06 19:25.05 [info     ] PRDC_20251206192337: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.004962193965911865, 'time_algorithm_update': 0.03410360407829285, 'critic_loss': 0.6324881958067418, 'actor_loss': -2.3014931964874266, 'dc_loss': 0.03151534645631909, 'time_step': 0.03930639338493347, 'td_error': 1.0798907810962755, 'value_scale': 2.8732976596240403, 'discounted_advantage': -3.3307141876560338, 'initial_state': 2.6504855155944824, 'diff_eval': 3440.5177157820804} step=2000
2025-12-06 19:25.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.95it/s, critic_loss=1.43, actor_loss=-2.26, dc_loss=0.0228]


2025-12-06 19:25.48 [info     ] PRDC_20251206192337: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.005139575004577636, 'time_algorithm_update': 0.03414276075363159, 'critic_loss': 1.4333358820080757, 'actor_loss': -2.264759135246277, 'dc_loss': 0.022782639011740685, 'time_step': 0.03953363513946533, 'td_error': 1.3590738457247282, 'value_scale': 3.5800248000369552, 'discounted_advantage': -4.298658678369133, 'initial_state': 2.8570849895477295, 'diff_eval': 3523.1548644717773} step=3000
2025-12-06 19:25.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.26it/s, critic_loss=2.58, actor_loss=-2.25, dc_loss=0.0202]


2025-12-06 19:26.31 [info     ] PRDC_20251206192337: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.004978881120681762, 'time_algorithm_update': 0.03382554888725281, 'critic_loss': 2.5849703254699707, 'actor_loss': -2.2546823234558104, 'dc_loss': 0.020173475332558154, 'time_step': 0.03906081008911133, 'td_error': 1.441968516943579, 'value_scale': 4.028374919640517, 'discounted_advantage': -4.22772068326048, 'initial_state': 3.337689161300659, 'diff_eval': 3675.719193460274} step=4000
2025-12-06 19:26.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, critic_loss=3.61, actor_loss=-2.25, dc_loss=0.0201]


2025-12-06 19:27.16 [info     ] PRDC_20251206192337: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.005900088310241699, 'time_algorithm_update': 0.035274881601333616, 'critic_loss': 3.620323373913765, 'actor_loss': -2.2512552080154418, 'dc_loss': 0.020130293641239406, 'time_step': 0.041434492588043215, 'td_error': 1.508542900643418, 'value_scale': 4.028637175572364, 'discounted_advantage': -4.996840312381302, 'initial_state': 3.3843133449554443, 'diff_eval': 3617.1253589516014} step=5000
2025-12-06 19:27.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.11it/s, critic_loss=4.23, actor_loss=-2.26, dc_loss=0.0219]


2025-12-06 19:28.00 [info     ] PRDC_20251206192337: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.0049925785064697265, 'time_algorithm_update': 0.03406472802162171, 'critic_loss': 4.237074507236481, 'actor_loss': -2.2582307114601137, 'dc_loss': 0.02188478128425777, 'time_step': 0.03930769157409668, 'td_error': 1.747093054853658, 'value_scale': 4.317455814105255, 'discounted_advantage': -5.4731171726978785, 'initial_state': 3.0821759700775146, 'diff_eval': 3929.6713989691884} step=6000
2025-12-06 19:28.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.28it/s, critic_loss=4.7, actor_loss=-2.27, dc_loss=0.0231]


2025-12-06 19:28.43 [info     ] PRDC_20251206192337: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.004911082744598389, 'time_algorithm_update': 0.033863556623458864, 'critic_loss': 4.70240398812294, 'actor_loss': -2.272461298942566, 'dc_loss': 0.023123271107673645, 'time_step': 0.03902883315086365, 'td_error': 1.8980794322505674, 'value_scale': 4.716080163971639, 'discounted_advantage': -5.25457454330027, 'initial_state': 3.55521297454834, 'diff_eval': 3952.8171919013794} step=7000
2025-12-06 19:28.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.02it/s, critic_loss=5.25, actor_loss=-2.29, dc_loss=0.025]


2025-12-06 19:29.26 [info     ] PRDC_20251206192337: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.004977713346481324, 'time_algorithm_update': 0.034213604211807254, 'critic_loss': 5.254804892063141, 'actor_loss': -2.2890680708885194, 'dc_loss': 0.02505377122759819, 'time_step': 0.03945787215232849, 'td_error': 2.2568234653422508, 'value_scale': 5.382565862821588, 'discounted_advantage': -6.714520392711609, 'initial_state': 3.534311532974243, 'diff_eval': 3888.2451662702792} step=8000
2025-12-06 19:29.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.16it/s, critic_loss=6.09, actor_loss=-2.31, dc_loss=0.0253]


2025-12-06 19:30.09 [info     ] PRDC_20251206192337: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.005048693418502808, 'time_algorithm_update': 0.03394011235237122, 'critic_loss': 6.088509802818298, 'actor_loss': -2.306220604419708, 'dc_loss': 0.02527033515274525, 'time_step': 0.0392253782749176, 'td_error': 2.471177873543528, 'value_scale': 6.0142723475313575, 'discounted_advantage': -6.945895027736069, 'initial_state': 4.00681209564209, 'diff_eval': 4059.8493844441223} step=9000
2025-12-06 19:30.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.30it/s, critic_loss=6.79, actor_loss=-2.32, dc_loss=0.0288]


2025-12-06 19:30.52 [info     ] PRDC_20251206192337: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.004914445400238037, 'time_algorithm_update': 0.03386651635169983, 'critic_loss': 6.78573881816864, 'actor_loss': -2.319902404308319, 'dc_loss': 0.02878287062048912, 'time_step': 0.03902903723716736, 'td_error': 2.4607704605392104, 'value_scale': 6.332333692694544, 'discounted_advantage': -6.916142390854814, 'initial_state': 4.472236156463623, 'diff_eval': 3702.2254475328655} step=10000
2025-12-06 19:30.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.07it/s, critic_loss=7.46, actor_loss=-2.34, dc_loss=0.0317]


2025-12-06 19:31.36 [info     ] PRDC_20251206192337: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.0050655012130737306, 'time_algorithm_update': 0.03404964780807495, 'critic_loss': 7.465629647731781, 'actor_loss': -2.3356242480278016, 'dc_loss': 0.03171755483746529, 'time_step': 0.03936707067489624, 'td_error': 2.688656670929, 'value_scale': 7.582662168887115, 'discounted_advantage': -8.423713479434664, 'initial_state': 6.2846527099609375, 'diff_eval': 4615.466939569833} step=11000
2025-12-06 19:31.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.07it/s, critic_loss=8.57, actor_loss=-2.35, dc_loss=0.032]


2025-12-06 19:32.19 [info     ] PRDC_20251206192337: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.005026891946792602, 'time_algorithm_update': 0.03407082176208496, 'critic_loss': 8.573998511314391, 'actor_loss': -2.3461273493766783, 'dc_loss': 0.03208692431077361, 'time_step': 0.039359164476394655, 'td_error': 2.871653669023057, 'value_scale': 7.508268569539124, 'discounted_advantage': -8.00572699841778, 'initial_state': 5.855370998382568, 'diff_eval': 5127.063521414876} step=12000
2025-12-06 19:32.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.09it/s, critic_loss=9.54, actor_loss=-2.35, dc_loss=0.0318]


2025-12-06 19:33.02 [info     ] PRDC_20251206192337: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.004998029708862305, 'time_algorithm_update': 0.03402759432792664, 'critic_loss': 9.541593309879303, 'actor_loss': -2.3456667985916138, 'dc_loss': 0.031732779700309036, 'time_step': 0.039279792308807376, 'td_error': 3.0696162019193163, 'value_scale': 7.6303922347099915, 'discounted_advantage': -7.993848804467103, 'initial_state': 5.646743297576904, 'diff_eval': 4206.139293494663} step=13000
2025-12-06 19:33.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.15it/s, critic_loss=10.6, actor_loss=-2.35, dc_loss=0.0297]


2025-12-06 19:33.46 [info     ] PRDC_20251206192337: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.005008453369140625, 'time_algorithm_update': 0.033969784259796146, 'critic_loss': 10.61006828737259, 'actor_loss': -2.3458220853805543, 'dc_loss': 0.029729601856321095, 'time_step': 0.03923151350021362, 'td_error': 3.076621814990642, 'value_scale': 8.200060886357445, 'discounted_advantage': -8.022191207635014, 'initial_state': 6.392654895782471, 'diff_eval': 4497.646183807586} step=14000
2025-12-06 19:33.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.88it/s, critic_loss=10.9, actor_loss=-2.34, dc_loss=0.0309]


2025-12-06 19:34.29 [info     ] PRDC_20251206192337: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.0051700685024261474, 'time_algorithm_update': 0.034194751739501955, 'critic_loss': 10.93002849960327, 'actor_loss': -2.3433070573806765, 'dc_loss': 0.031018148694187404, 'time_step': 0.03963284754753113, 'td_error': 3.619935098071646, 'value_scale': 8.61598321441436, 'discounted_advantage': -10.496531338161576, 'initial_state': 5.761929988861084, 'diff_eval': 6516.115609717823} step=15000
2025-12-06 19:34.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.82it/s, critic_loss=11.1, actor_loss=-2.35, dc_loss=0.031]


2025-12-06 19:35.15 [info     ] PRDC_20251206192337: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.00603940224647522, 'time_algorithm_update': 0.03505444884300232, 'critic_loss': 11.130080677986145, 'actor_loss': -2.3469171566963194, 'dc_loss': 0.031032928232103588, 'time_step': 0.04137093949317932, 'td_error': 3.032909739891417, 'value_scale': 8.753894789035105, 'discounted_advantage': -10.014491971079831, 'initial_state': 7.23322868347168, 'diff_eval': 4925.77479276568} step=16000
2025-12-06 19:35.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.70it/s, critic_loss=11.7, actor_loss=-2.35, dc_loss=0.0331]


2025-12-06 19:35.59 [info     ] PRDC_20251206192337: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.0057262425422668455, 'time_algorithm_update': 0.03397300219535827, 'critic_loss': 11.708507875919342, 'actor_loss': -2.3469115614891054, 'dc_loss': 0.03314013173431158, 'time_step': 0.03995614314079285, 'td_error': 2.798069234011361, 'value_scale': 8.540353923556703, 'discounted_advantage': -11.10926503103578, 'initial_state': 7.286439895629883, 'diff_eval': 7226.34687712159} step=17000
2025-12-06 19:35.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.28it/s, critic_loss=9.8, actor_loss=-2.34, dc_loss=0.048] 


2025-12-06 19:36.43 [info     ] PRDC_20251206192337: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.005070376396179199, 'time_algorithm_update': 0.035325669050216675, 'critic_loss': 9.771689722537994, 'actor_loss': -2.3392956404685976, 'dc_loss': 0.04811757714301348, 'time_step': 0.04064463973045349, 'td_error': 2.946657802472726, 'value_scale': 9.18478891532634, 'discounted_advantage': -9.53943777009115, 'initial_state': 8.289397239685059, 'diff_eval': 19065.616929963857} step=18000
2025-12-06 19:36.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.32it/s, critic_loss=9.27, actor_loss=-2.34, dc_loss=0.0519]


2025-12-06 19:37.28 [info     ] PRDC_20251206192337: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.005055783033370972, 'time_algorithm_update': 0.035262999773025515, 'critic_loss': 9.282549892425537, 'actor_loss': -2.344195915699005, 'dc_loss': 0.05191058199480176, 'time_step': 0.04058325624465942, 'td_error': 3.695664837778426, 'value_scale': 10.280642006068497, 'discounted_advantage': -13.521534866001327, 'initial_state': 11.40942096710205, 'diff_eval': 14586.758435758791} step=19000
2025-12-06 19:37.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.63it/s, critic_loss=12.5, actor_loss=-2.34, dc_loss=0.0721]


2025-12-06 19:38.12 [info     ] PRDC_20251206192337: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.0051012587547302245, 'time_algorithm_update': 0.03469293570518494, 'critic_loss': 12.486399532794952, 'actor_loss': -2.3438905992507935, 'dc_loss': 0.07214374221861362, 'time_step': 0.040054779291152955, 'td_error': 2.6258294387422403, 'value_scale': 9.460204237929958, 'discounted_advantage': -11.214719642756895, 'initial_state': 12.36606216430664, 'diff_eval': 13823.058922042326} step=20000
2025-12-06 19:38.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.87it/s, critic_loss=10.3, actor_loss=-2.34, dc_loss=0.071]


2025-12-06 19:38.55 [info     ] PRDC_20251206192337: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.005030872821807861, 'time_algorithm_update': 0.0343680419921875, 'critic_loss': 10.260248511791229, 'actor_loss': -2.3437994327545164, 'dc_loss': 0.07083998738974333, 'time_step': 0.03966036462783813, 'td_error': 2.6535430937028908, 'value_scale': 9.599456982915067, 'discounted_advantage': -11.561674278848525, 'initial_state': 9.656190872192383, 'diff_eval': 12650.624452543198} step=21000
2025-12-06 19:38.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.05it/s, critic_loss=9.27, actor_loss=-2.35, dc_loss=0.0591]


2025-12-06 19:39.39 [info     ] PRDC_20251206192337: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.005056065082550049, 'time_algorithm_update': 0.034097292184829715, 'critic_loss': 9.272528084516525, 'actor_loss': -2.352282371520996, 'dc_loss': 0.05917398166656494, 'time_step': 0.03941044163703918, 'td_error': 3.5888641375534798, 'value_scale': 10.940024563007537, 'discounted_advantage': -14.313844747481623, 'initial_state': 11.868236541748047, 'diff_eval': 19652.524252606014} step=22000
2025-12-06 19:39.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.27it/s, critic_loss=10.4, actor_loss=-2.33, dc_loss=0.0808]


2025-12-06 19:40.22 [info     ] PRDC_20251206192337: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.005007349729537964, 'time_algorithm_update': 0.03379919576644898, 'critic_loss': 10.379509983778, 'actor_loss': -2.3306999335289, 'dc_loss': 0.080867226049304, 'time_step': 0.03906342959403992, 'td_error': 3.9287383673271403, 'value_scale': 11.945938560366553, 'discounted_advantage': -14.268932128606812, 'initial_state': 12.343561172485352, 'diff_eval': 29766.931781487627} step=23000
2025-12-06 19:40.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.21it/s, critic_loss=12.1, actor_loss=-2.35, dc_loss=0.0683]


2025-12-06 19:41.05 [info     ] PRDC_20251206192337: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.005015602111816406, 'time_algorithm_update': 0.03389774966239929, 'critic_loss': 12.11072482061386, 'actor_loss': -2.3462418112754824, 'dc_loss': 0.0681824510768056, 'time_step': 0.039166281700134274, 'td_error': 3.706587863049256, 'value_scale': 12.051625224650031, 'discounted_advantage': -15.45576252708933, 'initial_state': 13.152435302734375, 'diff_eval': 15141.191707402726} step=24000
2025-12-06 19:41.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.10it/s, critic_loss=11.2, actor_loss=-2.34, dc_loss=0.0737]


2025-12-06 19:41.48 [info     ] PRDC_20251206192337: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.0051291511058807375, 'time_algorithm_update': 0.03395590353012085, 'critic_loss': 11.223215482711792, 'actor_loss': -2.338111563205719, 'dc_loss': 0.07385489968210458, 'time_step': 0.0393337562084198, 'td_error': 4.967539961635879, 'value_scale': 12.797555854214608, 'discounted_advantage': -17.77755116659054, 'initial_state': 13.694226264953613, 'diff_eval': 25015.564687118156} step=25000
2025-12-06 19:41.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=12.2, actor_loss=-2.34, dc_loss=0.0688]


2025-12-06 19:42.31 [info     ] PRDC_20251206192337: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.0050149202346801754, 'time_algorithm_update': 0.03395517635345459, 'critic_loss': 12.229764098644257, 'actor_loss': -2.344539635181427, 'dc_loss': 0.06866216865181923, 'time_step': 0.039215943336486814, 'td_error': 4.300257583869196, 'value_scale': 12.686395021431828, 'discounted_advantage': -17.357338981344796, 'initial_state': 12.522339820861816, 'diff_eval': 13936.39770990304} step=26000
2025-12-06 19:42.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.20it/s, critic_loss=12.6, actor_loss=-2.35, dc_loss=0.0571]


2025-12-06 19:43.14 [info     ] PRDC_20251206192337: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.005029397249221802, 'time_algorithm_update': 0.03385842251777649, 'critic_loss': 12.554496519565582, 'actor_loss': -2.3498800702095033, 'dc_loss': 0.05727289763092995, 'time_step': 0.03914456701278687, 'td_error': 5.0623590015466835, 'value_scale': 14.171900048799662, 'discounted_advantage': -16.750071055172125, 'initial_state': 14.08883285522461, 'diff_eval': 19071.295750039753} step=27000
2025-12-06 19:43.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.59it/s, critic_loss=13.4, actor_loss=-2.34, dc_loss=0.0803]


2025-12-06 19:43.59 [info     ] PRDC_20251206192337: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.005208441257476807, 'time_algorithm_update': 0.03462163138389587, 'critic_loss': 13.357925337791443, 'actor_loss': -2.340299572944641, 'dc_loss': 0.08003483411669732, 'time_step': 0.04009619832038879, 'td_error': 4.957608319781257, 'value_scale': 14.905101734008218, 'discounted_advantage': -17.044076965631717, 'initial_state': 14.578503608703613, 'diff_eval': 13102.780896337896} step=28000
2025-12-06 19:43.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:40<00:00, 25.00it/s, critic_loss=16.1, actor_loss=-2.36, dc_loss=0.058]


2025-12-06 19:44.42 [info     ] PRDC_20251206192337: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.005061838150024414, 'time_algorithm_update': 0.034150391817092896, 'critic_loss': 16.100117718219757, 'actor_loss': -2.3627168765068056, 'dc_loss': 0.0579722750633955, 'time_step': 0.0394714777469635, 'td_error': 5.558345171449857, 'value_scale': 15.429683574229832, 'discounted_advantage': -17.60109050950422, 'initial_state': 14.928796768188477, 'diff_eval': 14568.279575309149} step=29000
2025-12-06 19:44.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.33it/s, critic_loss=16.2, actor_loss=-2.36, dc_loss=0.0574]


2025-12-06 19:45.25 [info     ] PRDC_20251206192337: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.005021656513214111, 'time_algorithm_update': 0.03367823672294617, 'critic_loss': 16.186297112464906, 'actor_loss': -2.364719980239868, 'dc_loss': 0.057355569649487737, 'time_step': 0.03896042585372925, 'td_error': 6.904548636870358, 'value_scale': 17.38581823782132, 'discounted_advantage': -22.167719620644718, 'initial_state': 16.291038513183594, 'diff_eval': 15790.485829544232} step=30000
2025-12-06 19:45.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.61it/s, critic_loss=18.7, actor_loss=-2.36, dc_loss=0.0695]


2025-12-06 19:46.09 [info     ] PRDC_20251206192337: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.0050785446166992185, 'time_algorithm_update': 0.034711861371994016, 'critic_loss': 18.71372163391113, 'actor_loss': -2.3603380274772645, 'dc_loss': 0.06944196790456772, 'time_step': 0.04005695652961731, 'td_error': 7.463979834473088, 'value_scale': 18.281993202104765, 'discounted_advantage': -24.40217649717697, 'initial_state': 17.042509078979492, 'diff_eval': 17035.63640369121} step=31000
2025-12-06 19:46.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.67it/s, critic_loss=20.3, actor_loss=-2.37, dc_loss=0.0546]


2025-12-06 19:46.53 [info     ] PRDC_20251206192337: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.005546420574188232, 'time_algorithm_update': 0.03416020035743714, 'critic_loss': 20.23344662618637, 'actor_loss': -2.3746270537376404, 'dc_loss': 0.05460876268148422, 'time_step': 0.03997120237350464, 'td_error': 7.281065133560726, 'value_scale': 18.57186169445215, 'discounted_advantage': -22.450762992590146, 'initial_state': 17.888687133789062, 'diff_eval': 14907.804685159734} step=32000
2025-12-06 19:46.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.95it/s, critic_loss=19.7, actor_loss=-2.37, dc_loss=0.0691]


2025-12-06 19:47.36 [info     ] PRDC_20251206192337: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.005003770351409912, 'time_algorithm_update': 0.03428514766693115, 'critic_loss': 19.636441910743713, 'actor_loss': -2.367159266471863, 'dc_loss': 0.06906169033050537, 'time_step': 0.03954237198829651, 'td_error': 7.932310018749946, 'value_scale': 19.92416548148135, 'discounted_advantage': -24.417279634695163, 'initial_state': 19.117324829101562, 'diff_eval': 14959.19484201167} step=33000
2025-12-06 19:47.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.02it/s, critic_loss=19.9, actor_loss=-2.38, dc_loss=0.0519]


2025-12-06 19:48.20 [info     ] PRDC_20251206192337: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.0050266354084014895, 'time_algorithm_update': 0.03416841459274292, 'critic_loss': 19.93685156059265, 'actor_loss': -2.3840449137687685, 'dc_loss': 0.05187756563350558, 'time_step': 0.03945839166641235, 'td_error': 8.059484477799069, 'value_scale': 20.950586993981087, 'discounted_advantage': -26.23143611777975, 'initial_state': 20.5378360748291, 'diff_eval': 13936.081085457521} step=34000
2025-12-06 19:48.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.88it/s, critic_loss=21.2, actor_loss=-2.38, dc_loss=0.0563]


2025-12-06 19:49.03 [info     ] PRDC_20251206192337: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.0048873391151428225, 'time_algorithm_update': 0.03448988318443298, 'critic_loss': 21.199279742240904, 'actor_loss': -2.3832438983917235, 'dc_loss': 0.056527027901262045, 'time_step': 0.039650057792663576, 'td_error': 8.711364317786352, 'value_scale': 21.565369814599975, 'discounted_advantage': -24.9355000566405, 'initial_state': 22.755521774291992, 'diff_eval': 19229.08493082556} step=35000
2025-12-06 19:49.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.97it/s, critic_loss=21.7, actor_loss=-2.38, dc_loss=0.0643]


2025-12-06 19:49.49 [info     ] PRDC_20251206192337: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.005470573425292969, 'time_algorithm_update': 0.035343908071517945, 'critic_loss': 21.70726149368286, 'actor_loss': -2.3797204875946045, 'dc_loss': 0.06413955622166395, 'time_step': 0.041085593223571776, 'td_error': 8.472422808852214, 'value_scale': 22.797502924790425, 'discounted_advantage': -27.021750335627956, 'initial_state': 23.087818145751953, 'diff_eval': 15980.911206346202} step=36000
2025-12-06 19:49.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.03it/s, critic_loss=22, actor_loss=-2.39, dc_loss=0.0493] 


2025-12-06 19:50.32 [info     ] PRDC_20251206192337: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.005035569906234741, 'time_algorithm_update': 0.03412574291229248, 'critic_loss': 22.029578665733336, 'actor_loss': -2.3900594244003295, 'dc_loss': 0.049493233863264326, 'time_step': 0.039420470952987674, 'td_error': 8.969222457440297, 'value_scale': 23.257590539039917, 'discounted_advantage': -27.94348379287087, 'initial_state': 24.991561889648438, 'diff_eval': 18744.313533614342} step=37000
2025-12-06 19:50.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.23it/s, critic_loss=21.7, actor_loss=-2.38, dc_loss=0.0598]


2025-12-06 19:51.15 [info     ] PRDC_20251206192337: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.004984135389328003, 'time_algorithm_update': 0.03389118647575379, 'critic_loss': 21.73609647846222, 'actor_loss': -2.383701710224152, 'dc_loss': 0.059752512283623216, 'time_step': 0.039127347230911255, 'td_error': 8.907672334064944, 'value_scale': 23.872165947039328, 'discounted_advantage': -30.010162778429148, 'initial_state': 22.895212173461914, 'diff_eval': 16817.44876444401} step=38000
2025-12-06 19:51.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.79it/s, critic_loss=22, actor_loss=-2.39, dc_loss=0.0525] 


2025-12-06 19:51.59 [info     ] PRDC_20251206192337: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.005109536409378052, 'time_algorithm_update': 0.034417885065078736, 'critic_loss': 21.93185110425949, 'actor_loss': -2.39159055185318, 'dc_loss': 0.05251742257922888, 'time_step': 0.03978877401351929, 'td_error': 8.694333071928693, 'value_scale': 24.569777613255912, 'discounted_advantage': -29.598712502400055, 'initial_state': 24.088035583496094, 'diff_eval': 16394.81720820491} step=39000
2025-12-06 19:51.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.15it/s, critic_loss=22.1, actor_loss=-2.39, dc_loss=0.0527]


2025-12-06 19:52.42 [info     ] PRDC_20251206192337: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.005035077571868897, 'time_algorithm_update': 0.033918826103210446, 'critic_loss': 22.098077134132385, 'actor_loss': -2.3922599363327026, 'dc_loss': 0.052611556462943555, 'time_step': 0.03921612977981567, 'td_error': 8.541118165381306, 'value_scale': 24.616235576370187, 'discounted_advantage': -28.380686345680868, 'initial_state': 22.16787338256836, 'diff_eval': 14348.917749767756} step=40000
2025-12-06 19:52.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.26it/s, critic_loss=22.7, actor_loss=-2.39, dc_loss=0.0537]


2025-12-06 19:53.25 [info     ] PRDC_20251206192337: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.004944817066192627, 'time_algorithm_update': 0.03386407232284546, 'critic_loss': 22.69461203289032, 'actor_loss': -2.3922767639160156, 'dc_loss': 0.0537361608967185, 'time_step': 0.03907181024551391, 'td_error': 9.451515783125387, 'value_scale': 25.749010737128213, 'discounted_advantage': -32.52332448199853, 'initial_state': 25.10608673095703, 'diff_eval': 16818.758004999392} step=41000
2025-12-06 19:53.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.22it/s, critic_loss=24, actor_loss=-2.39, dc_loss=0.0553] 


2025-12-06 19:54.08 [info     ] PRDC_20251206192337: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.005002682447433472, 'time_algorithm_update': 0.03386408066749573, 'critic_loss': 23.956147594451906, 'actor_loss': -2.3939695954322815, 'dc_loss': 0.05541706404089928, 'time_step': 0.03911528253555298, 'td_error': 9.59469779023779, 'value_scale': 26.68974624634139, 'discounted_advantage': -30.392080201061404, 'initial_state': 27.105649948120117, 'diff_eval': 17869.56829321404} step=42000
2025-12-06 19:54.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.22it/s, critic_loss=26.2, actor_loss=-2.4, dc_loss=0.0528]


2025-12-06 19:54.51 [info     ] PRDC_20251206192337: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.004957382440567017, 'time_algorithm_update': 0.0338834319114685, 'critic_loss': 26.177486676216127, 'actor_loss': -2.3963017554283144, 'dc_loss': 0.05280445363372564, 'time_step': 0.039099068880081175, 'td_error': 10.00909559045595, 'value_scale': 27.411249510210464, 'discounted_advantage': -31.8442640208287, 'initial_state': 27.87787628173828, 'diff_eval': 18488.180141145873} step=43000
2025-12-06 19:54.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.06it/s, critic_loss=25.7, actor_loss=-2.4, dc_loss=0.055] 


2025-12-06 19:55.35 [info     ] PRDC_20251206192337: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.005014671564102173, 'time_algorithm_update': 0.03408724784851074, 'critic_loss': 25.662643688201904, 'actor_loss': -2.3983845610618593, 'dc_loss': 0.0550014823153615, 'time_step': 0.03935991263389588, 'td_error': 10.05172240427664, 'value_scale': 27.557873229454025, 'discounted_advantage': -32.4766630960664, 'initial_state': 28.12222671508789, 'diff_eval': 16435.472810223346} step=44000
2025-12-06 19:55.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.58it/s, critic_loss=27.4, actor_loss=-2.4, dc_loss=0.0538]


2025-12-06 19:56.19 [info     ] PRDC_20251206192337: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.00500355863571167, 'time_algorithm_update': 0.03487940001487732, 'critic_loss': 27.371580067634582, 'actor_loss': -2.3997623453140258, 'dc_loss': 0.05375254937261343, 'time_step': 0.04014259791374206, 'td_error': 9.739611199694576, 'value_scale': 27.313681658048242, 'discounted_advantage': -32.67340392884832, 'initial_state': 26.81340980529785, 'diff_eval': 16867.223885493564} step=45000
2025-12-06 19:56.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.88it/s, critic_loss=28.8, actor_loss=-2.4, dc_loss=0.0529]


2025-12-06 19:57.02 [info     ] PRDC_20251206192337: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.004995741844177246, 'time_algorithm_update': 0.03439531970024109, 'critic_loss': 28.845681136131287, 'actor_loss': -2.403100179195404, 'dc_loss': 0.053014705866575244, 'time_step': 0.03965500593185425, 'td_error': 10.216898895653273, 'value_scale': 28.01300294823751, 'discounted_advantage': -32.828269920536066, 'initial_state': 27.669965744018555, 'diff_eval': 20398.29999783499} step=46000
2025-12-06 19:57.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.85it/s, critic_loss=29.7, actor_loss=-2.4, dc_loss=0.0534]


2025-12-06 19:57.46 [info     ] PRDC_20251206192337: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.005073283433914185, 'time_algorithm_update': 0.03435475182533264, 'critic_loss': 29.720804385185243, 'actor_loss': -2.4028996076583864, 'dc_loss': 0.05343606641888619, 'time_step': 0.03968472909927368, 'td_error': 10.488963352401896, 'value_scale': 27.992858035688204, 'discounted_advantage': -34.04861195817698, 'initial_state': 27.234994888305664, 'diff_eval': 19110.27169126862} step=47000
2025-12-06 19:57.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=32.2, actor_loss=-2.4, dc_loss=0.0547]


2025-12-06 19:58.29 [info     ] PRDC_20251206192337: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.005027216672897339, 'time_algorithm_update': 0.033949198961257934, 'critic_loss': 32.252485039711, 'actor_loss': -2.402756045818329, 'dc_loss': 0.05462154880911112, 'time_step': 0.039220056056976316, 'td_error': 9.79579479061983, 'value_scale': 28.37451604319844, 'discounted_advantage': -31.258696072988446, 'initial_state': 28.65581512451172, 'diff_eval': 18517.823373187366} step=48000
2025-12-06 19:58.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.05it/s, critic_loss=33.5, actor_loss=-2.4, dc_loss=0.0549]


2025-12-06 19:59.14 [info     ] PRDC_20251206192337: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.0050181267261505125, 'time_algorithm_update': 0.03575660943984985, 'critic_loss': 33.54313938140869, 'actor_loss': -2.40458455657959, 'dc_loss': 0.05488678636401892, 'time_step': 0.041048718929290774, 'td_error': 9.446256327155064, 'value_scale': 29.03907931709704, 'discounted_advantage': -34.05787139859875, 'initial_state': 28.455106735229492, 'diff_eval': 21320.915128511442} step=49000
2025-12-06 19:59.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.85it/s, critic_loss=33.6, actor_loss=-2.41, dc_loss=0.053]


2025-12-06 19:59.58 [info     ] PRDC_20251206192337: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.005311491250991821, 'time_algorithm_update': 0.03411768221855164, 'critic_loss': 33.59776008796692, 'actor_loss': -2.405430143356323, 'dc_loss': 0.05306954676657915, 'time_step': 0.0396880202293396, 'td_error': 10.361756282988598, 'value_scale': 28.561909972656476, 'discounted_advantage': -35.31300376783593, 'initial_state': 26.499313354492188, 'diff_eval': 19327.291127024626} step=50000
2025-12-06 19:59.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.91it/s, critic_loss=34.1, actor_loss=-2.41, dc_loss=0.0543]


2025-12-06 20:00.42 [info     ] PRDC_20251206192337: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.005133166551589966, 'time_algorithm_update': 0.0341875274181366, 'critic_loss': 34.0242217130661, 'actor_loss': -2.4072211484909056, 'dc_loss': 0.0542623607814312, 'time_step': 0.03959599423408508, 'td_error': 9.884718905289818, 'value_scale': 28.633785566931774, 'discounted_advantage': -31.171207000087012, 'initial_state': 26.404220581054688, 'diff_eval': 16548.894772490738} step=51000
2025-12-06 20:00.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.97it/s, critic_loss=35.8, actor_loss=-2.41, dc_loss=0.0559]


2025-12-06 20:01.25 [info     ] PRDC_20251206192337: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.005228920698165894, 'time_algorithm_update': 0.034002546310424805, 'critic_loss': 35.88494099521637, 'actor_loss': -2.407447636127472, 'dc_loss': 0.055888785637915134, 'time_step': 0.03950195240974426, 'td_error': 10.533081627373738, 'value_scale': 28.872054802495846, 'discounted_advantage': -34.62359413704812, 'initial_state': 26.673999786376953, 'diff_eval': 22089.675740344082} step=52000
2025-12-06 20:01.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.97it/s, critic_loss=38.3, actor_loss=-2.41, dc_loss=0.0531]


2025-12-06 20:02.09 [info     ] PRDC_20251206192337: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.005129382133483886, 'time_algorithm_update': 0.03409162211418152, 'critic_loss': 38.27719547653198, 'actor_loss': -2.410528829574585, 'dc_loss': 0.05313816187530756, 'time_step': 0.0394995744228363, 'td_error': 11.4680171828814, 'value_scale': 29.467467259456406, 'discounted_advantage': -37.518349630612505, 'initial_state': 26.11798858642578, 'diff_eval': 20226.883049307467} step=53000
2025-12-06 20:02.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.21it/s, critic_loss=39.5, actor_loss=-2.41, dc_loss=0.0529]


2025-12-06 20:02.52 [info     ] PRDC_20251206192337: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.005049753427505493, 'time_algorithm_update': 0.0338409366607666, 'critic_loss': 39.503301538467404, 'actor_loss': -2.412076177597046, 'dc_loss': 0.052785404730588195, 'time_step': 0.039151143550872805, 'td_error': 11.547205657753532, 'value_scale': 28.865867116928026, 'discounted_advantage': -35.62812635283846, 'initial_state': 24.962081909179688, 'diff_eval': 18517.152527108046} step=54000
2025-12-06 20:02.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.28it/s, critic_loss=40.9, actor_loss=-2.41, dc_loss=0.0533]


2025-12-06 20:03.35 [info     ] PRDC_20251206192337: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.004983707189559937, 'time_algorithm_update': 0.033783472776412964, 'critic_loss': 40.947538455963134, 'actor_loss': -2.413316430568695, 'dc_loss': 0.053227436743676665, 'time_step': 0.03902334594726563, 'td_error': 11.245480958561815, 'value_scale': 30.232371998810322, 'discounted_advantage': -36.74239273415691, 'initial_state': 30.107973098754883, 'diff_eval': 19208.423622111106} step=55000
2025-12-06 20:03.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.20it/s, critic_loss=41.9, actor_loss=-2.41, dc_loss=0.0536]


2025-12-06 20:04.18 [info     ] PRDC_20251206192337: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.005073712587356567, 'time_algorithm_update': 0.033831976175308226, 'critic_loss': 41.89894402980804, 'actor_loss': -2.415040493965149, 'dc_loss': 0.05359199021756649, 'time_step': 0.03916202807426453, 'td_error': 11.228779653242865, 'value_scale': 30.500452746561443, 'discounted_advantage': -37.2864445152047, 'initial_state': 27.924901962280273, 'diff_eval': 17347.597832588173} step=56000
2025-12-06 20:04.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.32it/s, critic_loss=44.7, actor_loss=-2.42, dc_loss=0.0532]


2025-12-06 20:05.01 [info     ] PRDC_20251206192337: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.005005831718444824, 'time_algorithm_update': 0.03371480417251587, 'critic_loss': 44.64775903320312, 'actor_loss': -2.4151513032913208, 'dc_loss': 0.053214246228337286, 'time_step': 0.038977190256118774, 'td_error': 12.088697976580372, 'value_scale': 30.930994697346073, 'discounted_advantage': -40.59418632367679, 'initial_state': 27.659561157226562, 'diff_eval': 21657.030459062596} step=57000
2025-12-06 20:05.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.34it/s, critic_loss=43.2, actor_loss=-2.42, dc_loss=0.0521]


2025-12-06 20:05.44 [info     ] PRDC_20251206192337: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.0049417376518249515, 'time_algorithm_update': 0.03374430513381958, 'critic_loss': 43.248831922531124, 'actor_loss': -2.4184218158721924, 'dc_loss': 0.05204104113951325, 'time_step': 0.03894811177253723, 'td_error': 11.22038425408508, 'value_scale': 31.070930162699206, 'discounted_advantage': -35.81583013380191, 'initial_state': 28.198888778686523, 'diff_eval': 16676.301113333906} step=58000
2025-12-06 20:05.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.89it/s, critic_loss=47.6, actor_loss=-2.42, dc_loss=0.0543]


2025-12-06 20:06.27 [info     ] PRDC_20251206192337: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.0052230081558227535, 'time_algorithm_update': 0.034145641565322876, 'critic_loss': 47.498031675338744, 'actor_loss': -2.4170081510543824, 'dc_loss': 0.054333795376122, 'time_step': 0.039647753715515134, 'td_error': 12.476846152758705, 'value_scale': 32.63151899892103, 'discounted_advantage': -41.267382767321216, 'initial_state': 29.349184036254883, 'diff_eval': 19232.666757220468} step=59000
2025-12-06 20:06.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.98it/s, critic_loss=51, actor_loss=-2.42, dc_loss=0.0528] 


2025-12-06 20:07.13 [info     ] PRDC_20251206192337: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.005052346467971802, 'time_algorithm_update': 0.03585599112510681, 'critic_loss': 51.05498837089539, 'actor_loss': -2.4182367300987244, 'dc_loss': 0.0527941100075841, 'time_step': 0.04116816997528076, 'td_error': 12.04109145893789, 'value_scale': 32.197224046126294, 'discounted_advantage': -36.90078949573714, 'initial_state': 26.97690200805664, 'diff_eval': 15519.825531928842} step=60000
2025-12-06 20:07.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.06it/s, critic_loss=53, actor_loss=-2.42, dc_loss=0.0513] 


2025-12-06 20:07.58 [info     ] PRDC_20251206192337: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.00519608736038208, 'time_algorithm_update': 0.035525559186935424, 'critic_loss': 53.00376001358032, 'actor_loss': -2.4205909729003907, 'dc_loss': 0.051279746010899546, 'time_step': 0.040986057519912716, 'td_error': 11.89055993428864, 'value_scale': 33.1088557990716, 'discounted_advantage': -37.834363494021424, 'initial_state': 28.073740005493164, 'diff_eval': 14481.4897709401} step=61000
2025-12-06 20:07.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.15it/s, critic_loss=56.1, actor_loss=-2.42, dc_loss=0.0509]


2025-12-06 20:08.41 [info     ] PRDC_20251206192337: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.004945150852203369, 'time_algorithm_update': 0.034053610801696776, 'critic_loss': 56.07359817886353, 'actor_loss': -2.419710961818695, 'dc_loss': 0.05093243999779224, 'time_step': 0.03924511432647705, 'td_error': 12.716531870470932, 'value_scale': 34.97042590153949, 'discounted_advantage': -43.71689562095619, 'initial_state': 30.324932098388672, 'diff_eval': 18052.2122981954} step=62000
2025-12-06 20:08.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.15it/s, critic_loss=61.9, actor_loss=-2.42, dc_loss=0.0502]


2025-12-06 20:09.25 [info     ] PRDC_20251206192337: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.004950597047805786, 'time_algorithm_update': 0.033904398918151855, 'critic_loss': 61.99185757446289, 'actor_loss': -2.419658902645111, 'dc_loss': 0.05018703011423349, 'time_step': 0.039235971927642825, 'td_error': 14.718974901330665, 'value_scale': 36.24115766170189, 'discounted_advantage': -46.89480428741774, 'initial_state': 33.05671310424805, 'diff_eval': 17063.951512677977} step=63000
2025-12-06 20:09.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.79it/s, critic_loss=67, actor_loss=-2.42, dc_loss=0.0517] 


2025-12-06 20:10.09 [info     ] PRDC_20251206192337: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.00511696982383728, 'time_algorithm_update': 0.03439158010482788, 'critic_loss': 67.14931819343568, 'actor_loss': -2.418034056663513, 'dc_loss': 0.05174958967044949, 'time_step': 0.039780329942703245, 'td_error': 15.169330129180848, 'value_scale': 37.287408004215884, 'discounted_advantage': -47.062678643951735, 'initial_state': 32.62349319458008, 'diff_eval': 16328.040767141929} step=64000
2025-12-06 20:10.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.15it/s, critic_loss=75.1, actor_loss=-2.42, dc_loss=0.0498]


2025-12-06 20:10.52 [info     ] PRDC_20251206192337: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.004948644876480102, 'time_algorithm_update': 0.03405675959587097, 'critic_loss': 75.11116427993774, 'actor_loss': -2.42096980714798, 'dc_loss': 0.04977858252823353, 'time_step': 0.03924037194252014, 'td_error': 18.469354863685382, 'value_scale': 40.299664555687364, 'discounted_advantage': -46.95245358574236, 'initial_state': 35.694332122802734, 'diff_eval': 18329.278072516718} step=65000
2025-12-06 20:10.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.15it/s, critic_loss=82.9, actor_loss=-2.42, dc_loss=0.0532]


2025-12-06 20:11.37 [info     ] PRDC_20251206192337: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.005211802005767823, 'time_algorithm_update': 0.03528745412826538, 'critic_loss': 82.79324333572387, 'actor_loss': -2.418803590774536, 'dc_loss': 0.05322807874530554, 'time_step': 0.04077648520469666, 'td_error': 18.530758525865174, 'value_scale': 40.77949615750596, 'discounted_advantage': -55.27630510697436, 'initial_state': 36.92744064331055, 'diff_eval': 18122.288854966762} step=66000
2025-12-06 20:11.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.81it/s, critic_loss=86.5, actor_loss=-2.42, dc_loss=0.0545]


2025-12-06 20:12.20 [info     ] PRDC_20251206192337: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.005185895919799804, 'time_algorithm_update': 0.03430897831916809, 'critic_loss': 86.57874543762208, 'actor_loss': -2.416422508239746, 'dc_loss': 0.05443967439234257, 'time_step': 0.03974974489212036, 'td_error': 17.413403390599647, 'value_scale': 42.88081050747248, 'discounted_advantage': -54.757944856746, 'initial_state': 40.08955383300781, 'diff_eval': 17932.690058811713} step=67000
2025-12-06 20:12.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.22it/s, critic_loss=95.6, actor_loss=-2.42, dc_loss=0.0501]


2025-12-06 20:13.04 [info     ] PRDC_20251206192337: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.005050217151641846, 'time_algorithm_update': 0.033837751626968386, 'critic_loss': 95.53715964126587, 'actor_loss': -2.415671796321869, 'dc_loss': 0.05000185232236981, 'time_step': 0.03913891172409058, 'td_error': 17.31472706562526, 'value_scale': 43.8433562467329, 'discounted_advantage': -57.689309393075604, 'initial_state': 37.669517517089844, 'diff_eval': 15213.061780138745} step=68000
2025-12-06 20:13.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.21it/s, critic_loss=113, actor_loss=-2.41, dc_loss=0.048]


2025-12-06 20:13.47 [info     ] PRDC_20251206192337: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.005000328302383423, 'time_algorithm_update': 0.03388899803161621, 'critic_loss': 112.56488011550903, 'actor_loss': -2.4144585671424865, 'dc_loss': 0.047936068449169394, 'time_step': 0.039135997295379636, 'td_error': 20.20030401369347, 'value_scale': 44.771206556888856, 'discounted_advantage': -62.256289885766826, 'initial_state': 38.96656799316406, 'diff_eval': 14889.095526571933} step=69000
2025-12-06 20:13.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.91it/s, critic_loss=123, actor_loss=-2.41, dc_loss=0.0504]


2025-12-06 20:14.30 [info     ] PRDC_20251206192337: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.005117150545120239, 'time_algorithm_update': 0.034231966257095334, 'critic_loss': 122.89689834594726, 'actor_loss': -2.4104125304222106, 'dc_loss': 0.050392459042370316, 'time_step': 0.03961499547958374, 'td_error': 21.94251287007807, 'value_scale': 46.60247357869138, 'discounted_advantage': -64.98821269134118, 'initial_state': 39.874595642089844, 'diff_eval': 15852.1023902061} step=70000
2025-12-06 20:14.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.15it/s, critic_loss=138, actor_loss=-2.41, dc_loss=0.0484]


2025-12-06 20:15.14 [info     ] PRDC_20251206192337: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.005121121168136597, 'time_algorithm_update': 0.033838746786117556, 'critic_loss': 138.57066274261476, 'actor_loss': -2.4116196060180664, 'dc_loss': 0.048438729915767906, 'time_step': 0.03922562861442566, 'td_error': 20.741359819094242, 'value_scale': 48.27098249274322, 'discounted_advantage': -64.84508995848358, 'initial_state': 43.19475555419922, 'diff_eval': 14053.343934925642} step=71000
2025-12-06 20:15.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=151, actor_loss=-2.41, dc_loss=0.0482]


2025-12-06 20:15.57 [info     ] PRDC_20251206192337: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.005052270650863647, 'time_algorithm_update': 0.03385408496856689, 'critic_loss': 150.4007459869385, 'actor_loss': -2.4101793704032897, 'dc_loss': 0.04827287396043539, 'time_step': 0.039170965671539305, 'td_error': 24.269525411574968, 'value_scale': 50.1452436345346, 'discounted_advantage': -70.48338741094159, 'initial_state': 42.76717758178711, 'diff_eval': 15643.76389479126} step=72000
2025-12-06 20:15.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.09it/s, critic_loss=169, actor_loss=-2.4, dc_loss=0.0511]


2025-12-06 20:16.40 [info     ] PRDC_20251206192337: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.005065435647964477, 'time_algorithm_update': 0.03398017597198486, 'critic_loss': 168.87349920654296, 'actor_loss': -2.405181872844696, 'dc_loss': 0.05104986296221614, 'time_step': 0.03931769585609436, 'td_error': 30.82899853195709, 'value_scale': 53.38505297221637, 'discounted_advantage': -75.4086601488593, 'initial_state': 43.777976989746094, 'diff_eval': 16545.70514758534} step=73000
2025-12-06 20:16.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.13it/s, critic_loss=191, actor_loss=-2.4, dc_loss=0.052] 


2025-12-06 20:17.23 [info     ] PRDC_20251206192337: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.004981392621994018, 'time_algorithm_update': 0.034027482748031614, 'critic_loss': 191.44300749206542, 'actor_loss': -2.4020937428474425, 'dc_loss': 0.05207949239760637, 'time_step': 0.039262950658798215, 'td_error': 31.051518017750666, 'value_scale': 55.32398281163362, 'discounted_advantage': -76.2836872145746, 'initial_state': 42.43231964111328, 'diff_eval': 14532.12921361592} step=74000
2025-12-06 20:17.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.28it/s, critic_loss=221, actor_loss=-2.4, dc_loss=0.0479]


2025-12-06 20:18.06 [info     ] PRDC_20251206192337: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.004969717502593994, 'time_algorithm_update': 0.03382331109046936, 'critic_loss': 220.89595950317383, 'actor_loss': -2.402721210002899, 'dc_loss': 0.04784772875159979, 'time_step': 0.03904326581954956, 'td_error': 30.661869667992654, 'value_scale': 58.49562545950085, 'discounted_advantage': -79.05046135125517, 'initial_state': 47.04443359375, 'diff_eval': 13745.429765243658} step=75000
2025-12-06 20:18.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.20it/s, critic_loss=246, actor_loss=-2.4, dc_loss=0.0516]


2025-12-06 20:18.49 [info     ] PRDC_20251206192337: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.004981515645980835, 'time_algorithm_update': 0.033944389343261716, 'critic_loss': 246.52028497314453, 'actor_loss': -2.3966417083740232, 'dc_loss': 0.0516054121516645, 'time_step': 0.03917842602729797, 'td_error': 40.62844774115098, 'value_scale': 64.27941191875925, 'discounted_advantage': -86.36221912434313, 'initial_state': 51.967857360839844, 'diff_eval': 17536.395202957392} step=76000
2025-12-06 20:18.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.90it/s, critic_loss=273, actor_loss=-2.39, dc_loss=0.0511]


2025-12-06 20:19.33 [info     ] PRDC_20251206192337: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.00497717547416687, 'time_algorithm_update': 0.03440721750259399, 'critic_loss': 273.27709175872803, 'actor_loss': -2.3942744388580324, 'dc_loss': 0.05103036668896675, 'time_step': 0.03963532018661499, 'td_error': 54.77021246054121, 'value_scale': 68.75237904121195, 'discounted_advantage': -100.75661489886394, 'initial_state': 50.53361511230469, 'diff_eval': 15650.873986132028} step=77000
2025-12-06 20:19.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.28it/s, critic_loss=322, actor_loss=-2.39, dc_loss=0.0475]


2025-12-06 20:20.16 [info     ] PRDC_20251206192337: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.0048959672451019285, 'time_algorithm_update': 0.033884544134140014, 'critic_loss': 322.25873709869387, 'actor_loss': -2.393893558502197, 'dc_loss': 0.047477931044995786, 'time_step': 0.03903760242462158, 'td_error': 60.22704191358249, 'value_scale': 76.36956410694552, 'discounted_advantage': -109.32317466584612, 'initial_state': 55.17491149902344, 'diff_eval': 15716.878420467086} step=78000
2025-12-06 20:20.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.10it/s, critic_loss=379, actor_loss=-2.39, dc_loss=0.0474]


2025-12-06 20:20.59 [info     ] PRDC_20251206192337: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.004985865354537964, 'time_algorithm_update': 0.03407146430015564, 'critic_loss': 379.4343972930908, 'actor_loss': -2.3882845220565794, 'dc_loss': 0.047465221494436265, 'time_step': 0.03931066751480103, 'td_error': 74.00462361998834, 'value_scale': 84.69971777040321, 'discounted_advantage': -114.54287671653904, 'initial_state': 60.639041900634766, 'diff_eval': 14806.251020562566} step=79000
2025-12-06 20:21.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.57it/s, critic_loss=454, actor_loss=-2.39, dc_loss=0.0475]


2025-12-06 20:21.45 [info     ] PRDC_20251206192337: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.005007776260375976, 'time_algorithm_update': 0.03663059377670288, 'critic_loss': 454.95766654968264, 'actor_loss': -2.3853167824745176, 'dc_loss': 0.0474446374848485, 'time_step': 0.04189761781692505, 'td_error': 85.33456660397006, 'value_scale': 91.61640425432358, 'discounted_advantage': -128.44416894051565, 'initial_state': 61.407413482666016, 'diff_eval': 13461.069311040788} step=80000
2025-12-06 20:21.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=557, actor_loss=-2.39, dc_loss=0.0443]


2025-12-06 20:22.28 [info     ] PRDC_20251206192337: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.005071014404296875, 'time_algorithm_update': 0.0338803174495697, 'critic_loss': 558.1203886566162, 'actor_loss': -2.3877369809150695, 'dc_loss': 0.04425755681842566, 'time_step': 0.03919880747795105, 'td_error': 117.26581107263644, 'value_scale': 99.6765737369694, 'discounted_advantage': -142.44511781501993, 'initial_state': 63.825294494628906, 'diff_eval': 13660.80714806499} step=81000
2025-12-06 20:22.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.12it/s, critic_loss=662, actor_loss=-2.39, dc_loss=0.0435]


2025-12-06 20:23.11 [info     ] PRDC_20251206192337: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.0050743274688720705, 'time_algorithm_update': 0.03394465136528015, 'critic_loss': 662.585681640625, 'actor_loss': -2.3916656074523925, 'dc_loss': 0.04349959022924304, 'time_step': 0.03928053784370422, 'td_error': 135.26144905398343, 'value_scale': 109.99131769831203, 'discounted_advantage': -158.1260480669335, 'initial_state': 71.86050415039062, 'diff_eval': 14697.336080687784} step=82000
2025-12-06 20:23.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:40<00:00, 25.00it/s, critic_loss=767, actor_loss=-2.39, dc_loss=0.0443] 


2025-12-06 20:23.55 [info     ] PRDC_20251206192337: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.005159587860107422, 'time_algorithm_update': 0.03405455708503723, 'critic_loss': 767.9359878845215, 'actor_loss': -2.3917017822265625, 'dc_loss': 0.044284469705075026, 'time_step': 0.03948076796531677, 'td_error': 175.64816162094837, 'value_scale': 120.81015010738912, 'discounted_advantage': -181.14471521782477, 'initial_state': 78.90742492675781, 'diff_eval': 14394.626515216023} step=83000
2025-12-06 20:23.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.98it/s, critic_loss=881, actor_loss=-2.39, dc_loss=0.0451]


2025-12-06 20:24.38 [info     ] PRDC_20251206192337: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.005049646615982056, 'time_algorithm_update': 0.03419328570365906, 'critic_loss': 880.0521471252441, 'actor_loss': -2.392635314464569, 'dc_loss': 0.045010654103010896, 'time_step': 0.03950350022315979, 'td_error': 207.18561307011905, 'value_scale': 136.85703497293835, 'discounted_advantage': -200.80959511902353, 'initial_state': 88.67798614501953, 'diff_eval': 15497.813791179859} step=84000
2025-12-06 20:24.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.26it/s, critic_loss=1.04e+3, actor_loss=-2.4, dc_loss=0.042]


2025-12-06 20:25.21 [info     ] PRDC_20251206192337: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.004968256711959839, 'time_algorithm_update': 0.03383263635635376, 'critic_loss': 1036.9116819152832, 'actor_loss': -2.3959077167510987, 'dc_loss': 0.04194935140386224, 'time_step': 0.03906408786773682, 'td_error': 270.2355130084589, 'value_scale': 155.71741157572183, 'discounted_advantage': -209.68379033256772, 'initial_state': 106.12515258789062, 'diff_eval': 19083.686461036905} step=85000
2025-12-06 20:25.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.99it/s, critic_loss=1.15e+3, actor_loss=-2.4, dc_loss=0.0405]


2025-12-06 20:26.05 [info     ] PRDC_20251206192337: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.004954320430755615, 'time_algorithm_update': 0.03426064777374267, 'critic_loss': 1153.095538848877, 'actor_loss': -2.399800256729126, 'dc_loss': 0.04042613920196891, 'time_step': 0.03946974086761475, 'td_error': 302.7357313883654, 'value_scale': 173.23786425484346, 'discounted_advantage': -241.59592290953935, 'initial_state': 124.45648193359375, 'diff_eval': 18534.0381578195} step=86000
2025-12-06 20:26.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.99it/s, critic_loss=1.34e+3, actor_loss=-2.4, dc_loss=0.0379]


2025-12-06 20:26.48 [info     ] PRDC_20251206192337: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.005072120189666748, 'time_algorithm_update': 0.034156313180923464, 'critic_loss': 1343.1830256347657, 'actor_loss': -2.404458722114563, 'dc_loss': 0.03783595011383295, 'time_step': 0.03949259567260742, 'td_error': 357.12278014302854, 'value_scale': 192.20019030710878, 'discounted_advantage': -277.4747943586611, 'initial_state': 141.3245086669922, 'diff_eval': 18989.311464165956} step=87000
2025-12-06 20:26.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.74it/s, critic_loss=1.56e+3, actor_loss=-2.4, dc_loss=0.0378]


2025-12-06 20:27.32 [info     ] PRDC_20251206192337: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.00515579891204834, 'time_algorithm_update': 0.03445399928092956, 'critic_loss': 1562.8435463867188, 'actor_loss': -2.4041450066566465, 'dc_loss': 0.037814673114567994, 'time_step': 0.03988306283950806, 'td_error': 461.3043564294915, 'value_scale': 213.2229335451466, 'discounted_advantage': -313.32317154261636, 'initial_state': 155.6087646484375, 'diff_eval': 18339.836559684427} step=88000
2025-12-06 20:27.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.70it/s, critic_loss=1.83e+3, actor_loss=-2.41, dc_loss=0.0332]


2025-12-06 20:28.16 [info     ] PRDC_20251206192337: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.005132126808166504, 'time_algorithm_update': 0.03453223013877869, 'critic_loss': 1830.5856543579102, 'actor_loss': -2.4071152353286744, 'dc_loss': 0.033255280919373034, 'time_step': 0.039928011894226074, 'td_error': 444.90115755429224, 'value_scale': 222.8936740616391, 'discounted_advantage': -310.4481041359933, 'initial_state': 158.5149383544922, 'diff_eval': 11797.730842638304} step=89000
2025-12-06 20:28.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.26it/s, critic_loss=2.06e+3, actor_loss=-2.41, dc_loss=0.0311]


2025-12-06 20:28.59 [info     ] PRDC_20251206192337: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.005029472827911377, 'time_algorithm_update': 0.03380450987815857, 'critic_loss': 2064.750111633301, 'actor_loss': -2.404928842544556, 'dc_loss': 0.031134221039712428, 'time_step': 0.03907954454421997, 'td_error': 505.63772795796814, 'value_scale': 236.43231366873286, 'discounted_advantage': -327.23654982142483, 'initial_state': 168.1737518310547, 'diff_eval': 10395.364396041854} step=90000
2025-12-06 20:28.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.14it/s, critic_loss=2.22e+3, actor_loss=-2.41, dc_loss=0.0312]


2025-12-06 20:29.42 [info     ] PRDC_20251206192337: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.005067532300949097, 'time_algorithm_update': 0.03392653250694275, 'critic_loss': 2222.9217450561523, 'actor_loss': -2.4062491941452024, 'dc_loss': 0.031236783027648925, 'time_step': 0.03925259208679199, 'td_error': 481.989344777627, 'value_scale': 252.95504086497743, 'discounted_advantage': -325.9508654809165, 'initial_state': 192.74432373046875, 'diff_eval': 9443.193679669657} step=91000
2025-12-06 20:29.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:43<00:00, 23.06it/s, critic_loss=2.37e+3, actor_loss=-2.4, dc_loss=0.0319]


2025-12-06 20:30.29 [info     ] PRDC_20251206192337: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.005326472043991089, 'time_algorithm_update': 0.03720312929153442, 'critic_loss': 2373.2395213012696, 'actor_loss': -2.40462362575531, 'dc_loss': 0.031902166813611986, 'time_step': 0.0427898964881897, 'td_error': 561.3136466847692, 'value_scale': 266.2721078718973, 'discounted_advantage': -356.33853497860673, 'initial_state': 197.46742248535156, 'diff_eval': 9809.950987605735} step=92000
2025-12-06 20:30.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.57it/s, critic_loss=2.52e+3, actor_loss=-2.41, dc_loss=0.0314]


2025-12-06 20:31.13 [info     ] PRDC_20251206192337: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.005067902803421021, 'time_algorithm_update': 0.03481151223182678, 'critic_loss': 2529.143621520996, 'actor_loss': -2.4051772837638854, 'dc_loss': 0.03141829109564424, 'time_step': 0.040137651681900026, 'td_error': 623.6802778632352, 'value_scale': 280.7308577155977, 'discounted_advantage': -390.2265108896407, 'initial_state': 211.1046142578125, 'diff_eval': 11577.486443848733} step=93000
2025-12-06 20:31.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.85it/s, critic_loss=2.81e+3, actor_loss=-2.41, dc_loss=0.0301]


2025-12-06 20:31.57 [info     ] PRDC_20251206192337: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.005199046134948731, 'time_algorithm_update': 0.0342263343334198, 'critic_loss': 2819.7812416992188, 'actor_loss': -2.40970751619339, 'dc_loss': 0.03016068996489048, 'time_step': 0.03968968081474304, 'td_error': 598.8067950546762, 'value_scale': 293.95776765004786, 'discounted_advantage': -369.78953934125116, 'initial_state': 224.64878845214844, 'diff_eval': 8585.957428441741} step=94000
2025-12-06 20:31.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.67it/s, critic_loss=3.01e+3, actor_loss=-2.41, dc_loss=0.0288]


2025-12-06 20:32.43 [info     ] PRDC_20251206192337: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.0050773794651031495, 'time_algorithm_update': 0.03632223129272461, 'critic_loss': 3012.256244445801, 'actor_loss': -2.4124166932106017, 'dc_loss': 0.028804183565080167, 'time_step': 0.041665737628936765, 'td_error': 629.3515112381739, 'value_scale': 302.52489978512193, 'discounted_advantage': -390.23449426748243, 'initial_state': 228.75656127929688, 'diff_eval': 7480.100466271823} step=95000
2025-12-06 20:32.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=3.14e+3, actor_loss=-2.42, dc_loss=0.0286]


2025-12-06 20:33.26 [info     ] PRDC_20251206192337: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.004920151472091675, 'time_algorithm_update': 0.03400242471694946, 'critic_loss': 3139.466535583496, 'actor_loss': -2.41673051738739, 'dc_loss': 0.028572624515742065, 'time_step': 0.039187552213668826, 'td_error': 654.8427620075566, 'value_scale': 315.025240673878, 'discounted_advantage': -395.868207608542, 'initial_state': 242.9866943359375, 'diff_eval': 8860.444051955958} step=96000
2025-12-06 20:33.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.26it/s, critic_loss=3.34e+3, actor_loss=-2.42, dc_loss=0.0294]


2025-12-06 20:34.09 [info     ] PRDC_20251206192337: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.0049482274055480955, 'time_algorithm_update': 0.03385225296020508, 'critic_loss': 3335.811506347656, 'actor_loss': -2.415466579437256, 'dc_loss': 0.029337149228900673, 'time_step': 0.03906891775131226, 'td_error': 651.6190459241304, 'value_scale': 326.786079775058, 'discounted_advantage': -403.7774414895296, 'initial_state': 247.33026123046875, 'diff_eval': 6874.532869150464} step=97000
2025-12-06 20:34.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.56it/s, critic_loss=3.52e+3, actor_loss=-2.41, dc_loss=0.0281]


2025-12-06 20:34.53 [info     ] PRDC_20251206192337: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.0052058684825897214, 'time_algorithm_update': 0.034695052862167355, 'critic_loss': 3525.80214831543, 'actor_loss': -2.413231576919556, 'dc_loss': 0.028125978082418443, 'time_step': 0.04015685343742371, 'td_error': 632.1612877207707, 'value_scale': 328.3496208513173, 'discounted_advantage': -405.5194084785714, 'initial_state': 252.11630249023438, 'diff_eval': 6814.626415288193} step=98000
2025-12-06 20:34.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.03it/s, critic_loss=3.64e+3, actor_loss=-2.42, dc_loss=0.0285]


2025-12-06 20:35.36 [info     ] PRDC_20251206192337: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.004960811614990234, 'time_algorithm_update': 0.03417061614990234, 'critic_loss': 3643.6154030151365, 'actor_loss': -2.415635500431061, 'dc_loss': 0.028464466951787473, 'time_step': 0.03939174127578735, 'td_error': 691.9614246023425, 'value_scale': 336.5070671959525, 'discounted_advantage': -419.64215320129637, 'initial_state': 256.3708801269531, 'diff_eval': 7208.007523218634} step=99000
2025-12-06 20:35.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.20it/s, critic_loss=3.63e+3, actor_loss=-2.41, dc_loss=0.0281]


2025-12-06 20:36.20 [info     ] PRDC_20251206192337: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.004995433330535889, 'time_algorithm_update': 0.03393724918365479, 'critic_loss': 3628.001927734375, 'actor_loss': -2.413158133506775, 'dc_loss': 0.028149281810969113, 'time_step': 0.03917967009544372, 'td_error': 756.3502776814228, 'value_scale': 342.4444373570014, 'discounted_advantage': -436.11012519862925, 'initial_state': 262.8346862792969, 'diff_eval': 8961.361574441935} step=100000
2025-12-06 20:36.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.16it/s, critic_loss=3.53e+3, actor_loss=-2.41, dc_loss=0.0278]


2025-12-06 20:37.03 [info     ] PRDC_20251206192337: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.0049853272438049314, 'time_algorithm_update': 0.033960051774978635, 'critic_loss': 3529.006765930176, 'actor_loss': -2.411725812911987, 'dc_loss': 0.027777827337384223, 'time_step': 0.039200970888137815, 'td_error': 632.2960304283905, 'value_scale': 344.0398687439013, 'discounted_advantage': -391.1717513987178, 'initial_state': 270.4354553222656, 'diff_eval': 6957.294034739353} step=101000
2025-12-06 20:37.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.72it/s, critic_loss=3.62e+3, actor_loss=-2.41, dc_loss=0.0285]


2025-12-06 20:37.47 [info     ] PRDC_20251206192337: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.00510502552986145, 'time_algorithm_update': 0.034512669086456296, 'critic_loss': 3618.6914099731443, 'actor_loss': -2.4099993000030517, 'dc_loss': 0.028512460477650166, 'time_step': 0.0398928554058075, 'td_error': 642.6013360368568, 'value_scale': 344.13873753115246, 'discounted_advantage': -405.91467098395407, 'initial_state': 269.6935119628906, 'diff_eval': 9013.89369775905} step=102000
2025-12-06 20:37.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.18it/s, critic_loss=3.47e+3, actor_loss=-2.41, dc_loss=0.03] 


2025-12-06 20:38.30 [info     ] PRDC_20251206192337: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.004943245649337768, 'time_algorithm_update': 0.03397140049934387, 'critic_loss': 3463.337149658203, 'actor_loss': -2.4055440764427187, 'dc_loss': 0.03004584851115942, 'time_step': 0.039178945064544675, 'td_error': 580.6021012222857, 'value_scale': 345.8212414162307, 'discounted_advantage': -415.4374675530644, 'initial_state': 279.3613586425781, 'diff_eval': 7481.691435655159} step=103000
2025-12-06 20:38.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.27it/s, critic_loss=3.44e+3, actor_loss=-2.4, dc_loss=0.0303]


2025-12-06 20:39.13 [info     ] PRDC_20251206192337: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.004929034948348999, 'time_algorithm_update': 0.03387645244598389, 'critic_loss': 3433.311692138672, 'actor_loss': -2.3993446702957155, 'dc_loss': 0.030313716035336256, 'time_step': 0.03906511211395264, 'td_error': 545.200141946314, 'value_scale': 346.31146873747315, 'discounted_advantage': -409.4699071348908, 'initial_state': 279.23748779296875, 'diff_eval': 8376.413565085633} step=104000
2025-12-06 20:39.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.04it/s, critic_loss=3.29e+3, actor_loss=-2.4, dc_loss=0.0313]


2025-12-06 20:39.56 [info     ] PRDC_20251206192337: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.005034657478332519, 'time_algorithm_update': 0.03409599733352661, 'critic_loss': 3284.446328979492, 'actor_loss': -2.395512327194214, 'dc_loss': 0.031267301820218564, 'time_step': 0.039393258094787595, 'td_error': 521.7133512465747, 'value_scale': 339.133487166523, 'discounted_advantage': -413.9863097571155, 'initial_state': 277.1986389160156, 'diff_eval': 7619.869997418265} step=105000
2025-12-06 20:39.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.32it/s, critic_loss=3.24e+3, actor_loss=-2.39, dc_loss=0.034]


2025-12-06 20:40.39 [info     ] PRDC_20251206192337: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.004965397596359253, 'time_algorithm_update': 0.033792658567428586, 'critic_loss': 3227.6255239868165, 'actor_loss': -2.3925173325538633, 'dc_loss': 0.03399840265512467, 'time_step': 0.03899253463745117, 'td_error': 475.51947876523, 'value_scale': 337.4032423641136, 'discounted_advantage': -384.16020162933086, 'initial_state': 280.8578796386719, 'diff_eval': 8911.006385776021} step=106000
2025-12-06 20:40.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.90it/s, critic_loss=3.04e+3, actor_loss=-2.39, dc_loss=0.0341]


2025-12-06 20:41.23 [info     ] PRDC_20251206192337: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.005050447463989258, 'time_algorithm_update': 0.03432943153381347, 'critic_loss': 3042.1109158325194, 'actor_loss': -2.3884605026245116, 'dc_loss': 0.034094098538160325, 'time_step': 0.039626402139663695, 'td_error': 488.6924661411491, 'value_scale': 329.30638531617325, 'discounted_advantage': -410.74023937557916, 'initial_state': 270.38226318359375, 'diff_eval': 8960.353786911355} step=107000
2025-12-06 20:41.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.67it/s, critic_loss=2.84e+3, actor_loss=-2.38, dc_loss=0.0329]


2025-12-06 20:42.07 [info     ] PRDC_20251206192337: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.005169328451156616, 'time_algorithm_update': 0.03450052547454834, 'critic_loss': 2836.3306541748047, 'actor_loss': -2.377084620475769, 'dc_loss': 0.03291964774206281, 'time_step': 0.039949482679367064, 'td_error': 416.5260093160948, 'value_scale': 324.8892382610174, 'discounted_advantage': -382.21300035715933, 'initial_state': 270.8935852050781, 'diff_eval': 8255.704604296743} step=108000
2025-12-06 20:42.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.97it/s, critic_loss=2.55e+3, actor_loss=-2.37, dc_loss=0.0306]


2025-12-06 20:42.50 [info     ] PRDC_20251206192337: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.004926952123641967, 'time_algorithm_update': 0.03436809754371643, 'critic_loss': 2552.1588036499024, 'actor_loss': -2.366709488391876, 'dc_loss': 0.030638181146234275, 'time_step': 0.0395439236164093, 'td_error': 400.4969897616494, 'value_scale': 323.6368811312385, 'discounted_advantage': -379.598920522492, 'initial_state': 276.7578125, 'diff_eval': 9739.388426717598} step=109000
2025-12-06 20:42.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.28it/s, critic_loss=2.4e+3, actor_loss=-2.36, dc_loss=0.0313]


2025-12-06 20:43.35 [info     ] PRDC_20251206192337: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.005314810037612915, 'time_algorithm_update': 0.03500993013381958, 'critic_loss': 2394.88470111084, 'actor_loss': -2.3638292684555053, 'dc_loss': 0.03133851446211338, 'time_step': 0.04060661435127258, 'td_error': 414.89272281657605, 'value_scale': 318.90867975236785, 'discounted_advantage': -373.7489080417177, 'initial_state': 267.6405029296875, 'diff_eval': 8766.678835341994} step=110000
2025-12-06 20:43.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.61it/s, critic_loss=2.35e+3, actor_loss=-2.36, dc_loss=0.0318]


2025-12-06 20:44.21 [info     ] PRDC_20251206192337: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.005134140491485596, 'time_algorithm_update': 0.03638160395622254, 'critic_loss': 2353.745650085449, 'actor_loss': -2.3616085553169253, 'dc_loss': 0.03180254169180989, 'time_step': 0.041802278280258176, 'td_error': 353.6419031841537, 'value_scale': 314.9049827196672, 'discounted_advantage': -360.02637271559627, 'initial_state': 275.9666748046875, 'diff_eval': 8301.180970355037} step=111000
2025-12-06 20:44.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.05it/s, critic_loss=2.2e+3, actor_loss=-2.36, dc_loss=0.033] 


2025-12-06 20:45.06 [info     ] PRDC_20251206192337: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.005691475868225097, 'time_algorithm_update': 0.0350096480846405, 'critic_loss': 2205.925591369629, 'actor_loss': -2.3608621468544007, 'dc_loss': 0.03303034509345889, 'time_step': 0.04099097442626953, 'td_error': 296.80704620274736, 'value_scale': 304.0196645250864, 'discounted_advantage': -331.73612593064854, 'initial_state': 274.3196105957031, 'diff_eval': 8015.421096771742} step=112000
2025-12-06 20:45.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.14it/s, critic_loss=2.1e+3, actor_loss=-2.36, dc_loss=0.0344]


2025-12-06 20:45.49 [info     ] PRDC_20251206192337: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.005030627965927124, 'time_algorithm_update': 0.03397233462333679, 'critic_loss': 2103.7635482177734, 'actor_loss': -2.3596935081481933, 'dc_loss': 0.034446474723517896, 'time_step': 0.03926016283035278, 'td_error': 283.42860124163246, 'value_scale': 292.90677786053277, 'discounted_advantage': -327.81941775529543, 'initial_state': 262.2880554199219, 'diff_eval': 9805.404487818216} step=113000
2025-12-06 20:45.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.95it/s, critic_loss=1.92e+3, actor_loss=-2.36, dc_loss=0.0364]


2025-12-06 20:46.32 [info     ] PRDC_20251206192337: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.005097708225250244, 'time_algorithm_update': 0.03416919136047363, 'critic_loss': 1924.758755126953, 'actor_loss': -2.361698152542114, 'dc_loss': 0.03636778701096773, 'time_step': 0.039537519216537476, 'td_error': 227.15021041177926, 'value_scale': 284.8049698372099, 'discounted_advantage': -321.3464973131933, 'initial_state': 263.1620788574219, 'diff_eval': 9672.765588006228} step=114000
2025-12-06 20:46.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=1.77e+3, actor_loss=-2.35, dc_loss=0.0363]


2025-12-06 20:47.15 [info     ] PRDC_20251206192337: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.005032629489898682, 'time_algorithm_update': 0.03388888144493103, 'critic_loss': 1764.1495415344239, 'actor_loss': -2.354147002696991, 'dc_loss': 0.03627382070571184, 'time_step': 0.03916878890991211, 'td_error': 210.19726598249986, 'value_scale': 279.00486372532407, 'discounted_advantage': -319.8692115231122, 'initial_state': 261.86083984375, 'diff_eval': 10562.27836798316} step=115000
2025-12-06 20:47.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.97it/s, critic_loss=1.55e+3, actor_loss=-2.35, dc_loss=0.0372]


2025-12-06 20:47.59 [info     ] PRDC_20251206192337: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.005042250871658325, 'time_algorithm_update': 0.03421290111541748, 'critic_loss': 1547.568407470703, 'actor_loss': -2.35375253200531, 'dc_loss': 0.03716614911705256, 'time_step': 0.03951262450218201, 'td_error': 200.829670913214, 'value_scale': 268.6591023742553, 'discounted_advantage': -285.8535623898928, 'initial_state': 250.89305114746094, 'diff_eval': 9379.716109648829} step=116000
2025-12-06 20:47.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.28it/s, critic_loss=1.52e+3, actor_loss=-2.35, dc_loss=0.0367]


2025-12-06 20:48.42 [info     ] PRDC_20251206192337: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.004970246791839599, 'time_algorithm_update': 0.0337913019657135, 'critic_loss': 1521.798244720459, 'actor_loss': -2.3473913021087647, 'dc_loss': 0.036700861934572455, 'time_step': 0.03902671504020691, 'td_error': 180.9596385454791, 'value_scale': 254.54823194698986, 'discounted_advantage': -257.1943006397445, 'initial_state': 242.35479736328125, 'diff_eval': 8823.79857991123} step=117000
2025-12-06 20:48.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.35it/s, critic_loss=1.45e+3, actor_loss=-2.34, dc_loss=0.0367]


2025-12-06 20:49.25 [info     ] PRDC_20251206192337: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.004952167272567749, 'time_algorithm_update': 0.03375021982192993, 'critic_loss': 1444.7513621215821, 'actor_loss': -2.3439119873046876, 'dc_loss': 0.03664173354208469, 'time_step': 0.03895038366317749, 'td_error': 164.82788147886905, 'value_scale': 242.70910968901924, 'discounted_advantage': -260.02204530750475, 'initial_state': 234.34954833984375, 'diff_eval': 10718.462994207375} step=118000
2025-12-06 20:49.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.30it/s, critic_loss=1.31e+3, actor_loss=-2.34, dc_loss=0.0375]


2025-12-06 20:50.08 [info     ] PRDC_20251206192337: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.0049846994876861575, 'time_algorithm_update': 0.033776894092559816, 'critic_loss': 1309.27083404541, 'actor_loss': -2.337562770843506, 'dc_loss': 0.03748911714553833, 'time_step': 0.03901297426223755, 'td_error': 156.57033708273988, 'value_scale': 228.49408730559756, 'discounted_advantage': -235.83436776306795, 'initial_state': 223.56419372558594, 'diff_eval': 10104.230395424316} step=119000
2025-12-06 20:50.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.16it/s, critic_loss=1.12e+3, actor_loss=-2.34, dc_loss=0.0381]


2025-12-06 20:50.51 [info     ] PRDC_20251206192337: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.004960589647293091, 'time_algorithm_update': 0.0340233051776886, 'critic_loss': 1115.3717707519531, 'actor_loss': -2.3373377928733827, 'dc_loss': 0.03811618226766586, 'time_step': 0.03923554468154907, 'td_error': 141.69901237711258, 'value_scale': 218.81324187530655, 'discounted_advantage': -224.44581362830027, 'initial_state': 217.44190979003906, 'diff_eval': 10555.456112199587} step=120000
2025-12-06 20:50.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.25it/s, critic_loss=1.04e+3, actor_loss=-2.33, dc_loss=0.039]


2025-12-06 20:51.34 [info     ] PRDC_20251206192337: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.005004555702209473, 'time_algorithm_update': 0.03381154823303223, 'critic_loss': 1036.2640408935547, 'actor_loss': -2.326831760406494, 'dc_loss': 0.038976437244564295, 'time_step': 0.0390645592212677, 'td_error': 152.2034324446589, 'value_scale': 205.1026288522509, 'discounted_advantage': -232.5931862852726, 'initial_state': 202.4629364013672, 'diff_eval': 13900.541807611351} step=121000
2025-12-06 20:51.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.72it/s, critic_loss=877, actor_loss=-2.32, dc_loss=0.0393] 


2025-12-06 20:52.18 [info     ] PRDC_20251206192337: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.004996180772781372, 'time_algorithm_update': 0.03467944550514221, 'critic_loss': 881.5087712402344, 'actor_loss': -2.3208056015968324, 'dc_loss': 0.03930477595701814, 'time_step': 0.039926676750183104, 'td_error': 132.73490588160448, 'value_scale': 192.64117225635508, 'discounted_advantage': -208.08727390345408, 'initial_state': 192.78530883789062, 'diff_eval': 12534.427151379025} step=122000
2025-12-06 20:52.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.98it/s, critic_loss=845, actor_loss=-2.31, dc_loss=0.0403]


2025-12-06 20:53.02 [info     ] PRDC_20251206192337: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.004976388931274414, 'time_algorithm_update': 0.03429418778419495, 'critic_loss': 842.8455293273926, 'actor_loss': -2.3112395157814025, 'dc_loss': 0.04024287925288081, 'time_step': 0.03951808476448059, 'td_error': 114.35322634979656, 'value_scale': 185.11301913322868, 'discounted_advantage': -193.40832463498296, 'initial_state': 188.89251708984375, 'diff_eval': 12187.974126330188} step=123000
2025-12-06 20:53.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.92it/s, critic_loss=788, actor_loss=-2.3, dc_loss=0.0419]


2025-12-06 20:53.45 [info     ] PRDC_20251206192337: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.004975218296051026, 'time_algorithm_update': 0.034387949228286745, 'critic_loss': 790.2841764526368, 'actor_loss': -2.3013144898414613, 'dc_loss': 0.041899652145802976, 'time_step': 0.03961394429206848, 'td_error': 133.24208554482223, 'value_scale': 175.30582063081178, 'discounted_advantage': -185.5840612898697, 'initial_state': 171.6951141357422, 'diff_eval': 12680.146969697802} step=124000
2025-12-06 20:53.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.68it/s, critic_loss=663, actor_loss=-2.3, dc_loss=0.0414]


2025-12-06 20:54.29 [info     ] PRDC_20251206192337: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.005158429861068726, 'time_algorithm_update': 0.03458825945854187, 'critic_loss': 661.3129359436035, 'actor_loss': -2.3020671944618227, 'dc_loss': 0.041424537062644956, 'time_step': 0.04000571727752686, 'td_error': 141.10364749523885, 'value_scale': 169.00679759652496, 'discounted_advantage': -198.41360336418978, 'initial_state': 162.16925048828125, 'diff_eval': 16285.120896347136} step=125000
2025-12-06 20:54.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.04it/s, critic_loss=623, actor_loss=-2.31, dc_loss=0.0412]


2025-12-06 20:55.12 [info     ] PRDC_20251206192337: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.0050122618675231935, 'time_algorithm_update': 0.03417117977142334, 'critic_loss': 621.6062053375244, 'actor_loss': -2.3086888589859007, 'dc_loss': 0.04122156235575676, 'time_step': 0.03943348670005798, 'td_error': 125.88540559164677, 'value_scale': 162.43748413745786, 'discounted_advantage': -179.6773738707461, 'initial_state': 156.71502685546875, 'diff_eval': 15998.59553757597} step=126000
2025-12-06 20:55.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.05it/s, critic_loss=584, actor_loss=-2.31, dc_loss=0.0419]


2025-12-06 20:55.56 [info     ] PRDC_20251206192337: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.005001518726348877, 'time_algorithm_update': 0.034152948141098025, 'critic_loss': 584.0666630401612, 'actor_loss': -2.310950376033783, 'dc_loss': 0.04189796907827258, 'time_step': 0.039399691343307494, 'td_error': 131.29973007505976, 'value_scale': 155.08002392629166, 'discounted_advantage': -182.17144173838503, 'initial_state': 149.68328857421875, 'diff_eval': 16531.672708684855} step=127000
2025-12-06 20:55.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.09it/s, critic_loss=529, actor_loss=-2.32, dc_loss=0.0404]


2025-12-06 20:56.39 [info     ] PRDC_20251206192337: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.005017491340637207, 'time_algorithm_update': 0.03406101822853089, 'critic_loss': 530.1542826690674, 'actor_loss': -2.320430076122284, 'dc_loss': 0.04043363052606583, 'time_step': 0.03933145093917847, 'td_error': 120.64782500739624, 'value_scale': 150.55112886937195, 'discounted_advantage': -166.27822176736476, 'initial_state': 148.85340881347656, 'diff_eval': 17816.084121998952} step=128000
2025-12-06 20:56.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.71it/s, critic_loss=530, actor_loss=-2.32, dc_loss=0.0406]


2025-12-06 20:57.23 [info     ] PRDC_20251206192337: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.00515139365196228, 'time_algorithm_update': 0.034487569332122804, 'critic_loss': 529.234712387085, 'actor_loss': -2.322855844497681, 'dc_loss': 0.04060124322772026, 'time_step': 0.039896810054779054, 'td_error': 120.57483960407754, 'value_scale': 140.30930507943123, 'discounted_advantage': -146.25399516677552, 'initial_state': 138.29835510253906, 'diff_eval': 14146.1525914463} step=129000
2025-12-06 20:57.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.12it/s, critic_loss=485, actor_loss=-2.33, dc_loss=0.0396]


2025-12-06 20:58.06 [info     ] PRDC_20251206192337: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.004982391357421875, 'time_algorithm_update': 0.034053303241729733, 'critic_loss': 485.3200071105957, 'actor_loss': -2.325462474346161, 'dc_loss': 0.03959769183024764, 'time_step': 0.039289860486984254, 'td_error': 123.53636368225531, 'value_scale': 130.10665312941495, 'discounted_advantage': -126.36384798771095, 'initial_state': 127.49856567382812, 'diff_eval': 13208.233854751934} step=130000
2025-12-06 20:58.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=481, actor_loss=-2.32, dc_loss=0.0404]


2025-12-06 20:58.49 [info     ] PRDC_20251206192337: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.004960146427154541, 'time_algorithm_update': 0.03398874950408935, 'critic_loss': 479.83132202148437, 'actor_loss': -2.321561228275299, 'dc_loss': 0.04041757974401116, 'time_step': 0.03919717955589294, 'td_error': 112.70334196060448, 'value_scale': 121.21297031251304, 'discounted_advantage': -125.13672022663714, 'initial_state': 125.37300109863281, 'diff_eval': 14507.657527447876} step=131000
2025-12-06 20:58.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.96it/s, critic_loss=472, actor_loss=-2.3, dc_loss=0.0427]


2025-12-06 20:59.33 [info     ] PRDC_20251206192337: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.005076646566390991, 'time_algorithm_update': 0.03423015356063843, 'critic_loss': 471.59129206848144, 'actor_loss': -2.301400677204132, 'dc_loss': 0.042745566692203286, 'time_step': 0.039549298524856565, 'td_error': 104.80251435852331, 'value_scale': 108.45119280277534, 'discounted_advantage': -115.46052272147251, 'initial_state': 115.10504913330078, 'diff_eval': 15991.377837079524} step=132000
2025-12-06 20:59.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.27it/s, critic_loss=422, actor_loss=-2.28, dc_loss=0.0461]


2025-12-06 21:00.16 [info     ] PRDC_20251206192337: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.0049726772308349605, 'time_algorithm_update': 0.03383521389961243, 'critic_loss': 421.9210760345459, 'actor_loss': -2.27889045715332, 'dc_loss': 0.04612720078974962, 'time_step': 0.039055501461029055, 'td_error': 108.99280562897864, 'value_scale': 97.62488135846593, 'discounted_advantage': -105.69098029075171, 'initial_state': 105.71442413330078, 'diff_eval': 16674.834682447578} step=133000
2025-12-06 21:00.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=401, actor_loss=-2.27, dc_loss=0.0516]


2025-12-06 21:00.59 [info     ] PRDC_20251206192337: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.005011232614517212, 'time_algorithm_update': 0.03396779370307922, 'critic_loss': 400.3164179382324, 'actor_loss': -2.2652467036247255, 'dc_loss': 0.051636879995465276, 'time_step': 0.039218400716781614, 'td_error': 109.0617455432411, 'value_scale': 86.94878649334396, 'discounted_advantage': -98.4025200809931, 'initial_state': 97.9692153930664, 'diff_eval': 21043.21046091314} step=134000
2025-12-06 21:00.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.89it/s, critic_loss=366, actor_loss=-2.24, dc_loss=0.0604]


2025-12-06 21:01.43 [info     ] PRDC_20251206192337: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.005137757539749145, 'time_algorithm_update': 0.03425115919113159, 'critic_loss': 365.2825288696289, 'actor_loss': -2.242733971595764, 'dc_loss': 0.06045288484543562, 'time_step': 0.039655489683151246, 'td_error': 104.38850816509935, 'value_scale': 78.03613875294221, 'discounted_advantage': -94.85136800905349, 'initial_state': 89.95796966552734, 'diff_eval': 20841.033958973276} step=135000
2025-12-06 21:01.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.09it/s, critic_loss=325, actor_loss=-2.22, dc_loss=0.0658]


2025-12-06 21:02.26 [info     ] PRDC_20251206192337: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.005049255609512329, 'time_algorithm_update': 0.03404556608200073, 'critic_loss': 324.78204942321776, 'actor_loss': -2.22251242685318, 'dc_loss': 0.06581553861498833, 'time_step': 0.039337733268737796, 'td_error': 119.35529625067522, 'value_scale': 69.9680471809773, 'discounted_advantage': -98.57606617281766, 'initial_state': 78.8195571899414, 'diff_eval': 33618.506844305455} step=136000
2025-12-06 21:02.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.27it/s, critic_loss=323, actor_loss=-2.2, dc_loss=0.067] 


2025-12-06 21:03.11 [info     ] PRDC_20251206192337: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.0050165469646453855, 'time_algorithm_update': 0.035424579381942746, 'critic_loss': 323.17767113494875, 'actor_loss': -2.2013563194274903, 'dc_loss': 0.06695094184577464, 'time_step': 0.04069552326202393, 'td_error': 106.29827872353123, 'value_scale': 59.526194688585356, 'discounted_advantage': -81.94754638403249, 'initial_state': 69.12877655029297, 'diff_eval': 31550.724399994513} step=137000
2025-12-06 21:03.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.36it/s, critic_loss=298, actor_loss=-2.17, dc_loss=0.0659]


2025-12-06 21:03.57 [info     ] PRDC_20251206192337: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.005127430438995361, 'time_algorithm_update': 0.03684030866622925, 'critic_loss': 300.70697797393797, 'actor_loss': -2.171035717010498, 'dc_loss': 0.06588699319213628, 'time_step': 0.04225322318077088, 'td_error': 100.29699409355113, 'value_scale': 49.79848062803021, 'discounted_advantage': -74.21683783450506, 'initial_state': 57.76797866821289, 'diff_eval': 30765.51026290323} step=138000
2025-12-06 21:03.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.22it/s, critic_loss=280, actor_loss=-2.15, dc_loss=0.0669]


2025-12-06 21:04.42 [info     ] PRDC_20251206192337: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.005009057521820068, 'time_algorithm_update': 0.03548154926300049, 'critic_loss': 279.16682337188723, 'actor_loss': -2.149477262020111, 'dc_loss': 0.0668707415536046, 'time_step': 0.04075014281272888, 'td_error': 90.02211693637497, 'value_scale': 45.404651085924975, 'discounted_advantage': -66.04267942420671, 'initial_state': 51.99201202392578, 'diff_eval': 31257.517304150628} step=139000
2025-12-06 21:04.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:44<00:00, 22.56it/s, critic_loss=259, actor_loss=-2.12, dc_loss=0.0699]


2025-12-06 21:05.30 [info     ] PRDC_20251206192337: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.005371288776397705, 'time_algorithm_update': 0.038078763961791995, 'critic_loss': 258.54705935668943, 'actor_loss': -2.1244341943264007, 'dc_loss': 0.06991754227131605, 'time_step': 0.043719781398773194, 'td_error': 85.5875383245187, 'value_scale': 38.353590551835595, 'discounted_advantage': -64.91833382934387, 'initial_state': 42.91872024536133, 'diff_eval': 35059.0533863228} step=140000
2025-12-06 21:05.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.37it/s, critic_loss=245, actor_loss=-2.09, dc_loss=0.0711]


2025-12-06 21:06.14 [info     ] PRDC_20251206192337: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.005047165393829346, 'time_algorithm_update': 0.03517418169975281, 'critic_loss': 244.26315753173827, 'actor_loss': -2.0869501359462737, 'dc_loss': 0.0710040684118867, 'time_step': 0.04047389769554138, 'td_error': 86.1474429534749, 'value_scale': 35.33735174857461, 'discounted_advantage': -63.70723899698507, 'initial_state': 41.183528900146484, 'diff_eval': 36751.846898066426} step=141000
2025-12-06 21:06.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=235, actor_loss=-2.04, dc_loss=0.0797]


2025-12-06 21:06.57 [info     ] PRDC_20251206192337: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.004931568145751953, 'time_algorithm_update': 0.03403035688400269, 'critic_loss': 234.1775549621582, 'actor_loss': -2.037434410095215, 'dc_loss': 0.07985102225840092, 'time_step': 0.03921263551712036, 'td_error': 83.72424865030897, 'value_scale': 29.391151420768953, 'discounted_advantage': -60.78146810857516, 'initial_state': 35.573486328125, 'diff_eval': 37895.50253884657} step=142000
2025-12-06 21:06.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.19it/s, critic_loss=224, actor_loss=-1.98, dc_loss=0.101]


2025-12-06 21:07.40 [info     ] PRDC_20251206192337: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.0049806742668151854, 'time_algorithm_update': 0.03393897390365601, 'critic_loss': 223.7227081451416, 'actor_loss': -1.9842180414199828, 'dc_loss': 0.10092142409086227, 'time_step': 0.039168302059173585, 'td_error': 83.12496986260612, 'value_scale': 25.798722870754077, 'discounted_advantage': -61.3410820004447, 'initial_state': 24.741622924804688, 'diff_eval': 35929.15089445649} step=143000
2025-12-06 21:07.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.26it/s, critic_loss=224, actor_loss=-1.95, dc_loss=0.107]


2025-12-06 21:08.23 [info     ] PRDC_20251206192337: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.004898513078689575, 'time_algorithm_update': 0.033937016010284426, 'critic_loss': 222.87995551300048, 'actor_loss': -1.954903558731079, 'dc_loss': 0.106795034840703, 'time_step': 0.03908787107467651, 'td_error': 82.90927216916248, 'value_scale': 23.457631202937673, 'discounted_advantage': -61.89978976448697, 'initial_state': 21.193864822387695, 'diff_eval': 35000.30795558775} step=144000
2025-12-06 21:08.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.82it/s, critic_loss=242, actor_loss=-1.91, dc_loss=0.111]


2025-12-06 21:09.07 [info     ] PRDC_20251206192337: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.005092460155487061, 'time_algorithm_update': 0.03438689875602722, 'critic_loss': 241.38285011291504, 'actor_loss': -1.9142637124061586, 'dc_loss': 0.11104041956365109, 'time_step': 0.03974363660812378, 'td_error': 76.02587539151342, 'value_scale': 20.0062547715042, 'discounted_advantage': -49.278444435634, 'initial_state': 17.298799514770508, 'diff_eval': 31575.69082459912} step=145000
2025-12-06 21:09.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.72it/s, critic_loss=234, actor_loss=-1.86, dc_loss=0.113] 


2025-12-06 21:09.51 [info     ] PRDC_20251206192337: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.005222749710083008, 'time_algorithm_update': 0.034397658586502074, 'critic_loss': 233.26212902832032, 'actor_loss': -1.860371134519577, 'dc_loss': 0.11327674049139023, 'time_step': 0.03989228415489197, 'td_error': 98.14003400896591, 'value_scale': 17.962228757843736, 'discounted_advantage': -62.53244848542085, 'initial_state': 16.292091369628906, 'diff_eval': 38762.49886536237} step=146000
2025-12-06 21:09.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=216, actor_loss=-1.72, dc_loss=0.106]


2025-12-06 21:10.34 [info     ] PRDC_20251206192337: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.005018296003341674, 'time_algorithm_update': 0.03393381810188294, 'critic_loss': 217.0428014984131, 'actor_loss': -1.721920368909836, 'dc_loss': 0.10647642835974694, 'time_step': 0.03921191692352295, 'td_error': 96.6492255247533, 'value_scale': 17.51236699302958, 'discounted_advantage': -59.532518159757515, 'initial_state': 14.951491355895996, 'diff_eval': 38661.501075736014} step=147000
2025-12-06 21:10.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.02it/s, critic_loss=221, actor_loss=-1.61, dc_loss=0.107]


2025-12-06 21:11.17 [info     ] PRDC_20251206192337: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.005084576845169067, 'time_algorithm_update': 0.03405987930297852, 'critic_loss': 220.35105508041383, 'actor_loss': -1.6056690862178802, 'dc_loss': 0.106891617462039, 'time_step': 0.03940916585922241, 'td_error': 95.27652863351474, 'value_scale': 13.981747470496336, 'discounted_advantage': -52.04902406765102, 'initial_state': 11.503317832946777, 'diff_eval': 41041.486937628455} step=148000
2025-12-06 21:11.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.02it/s, critic_loss=240, actor_loss=-1.52, dc_loss=0.101]


2025-12-06 21:12.01 [info     ] PRDC_20251206192337: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.005063275098800659, 'time_algorithm_update': 0.034122605562210084, 'critic_loss': 240.0058659248352, 'actor_loss': -1.5227648577690125, 'dc_loss': 0.10076777148991824, 'time_step': 0.03944406461715698, 'td_error': 111.57017249069246, 'value_scale': 12.409672284550334, 'discounted_advantage': -58.47267006820915, 'initial_state': 8.888309478759766, 'diff_eval': 42334.36939281846} step=149000
2025-12-06 21:12.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.04it/s, critic_loss=240, actor_loss=-1.49, dc_loss=0.105]


2025-12-06 21:12.44 [info     ] PRDC_20251206192337: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.005040540933609009, 'time_algorithm_update': 0.034117602348327634, 'critic_loss': 243.76055766296386, 'actor_loss': -1.492255079627037, 'dc_loss': 0.1049478107392788, 'time_step': 0.03941153430938721, 'td_error': 123.8294705399839, 'value_scale': 11.816790190321601, 'discounted_advantage': -58.88985546848882, 'initial_state': 7.697225570678711, 'diff_eval': 39740.689700575305} step=150000
2025-12-06 21:12.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.76it/s, critic_loss=261, actor_loss=-1.46, dc_loss=0.11]


2025-12-06 21:13.28 [info     ] PRDC_20251206192337: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.005076151371002197, 'time_algorithm_update': 0.03446474504470825, 'critic_loss': 264.7651814498901, 'actor_loss': -1.461285769343376, 'dc_loss': 0.11039242969453335, 'time_step': 0.0398102593421936, 'td_error': 137.7966410795442, 'value_scale': 8.38310895176965, 'discounted_advantage': -60.682126747581364, 'initial_state': 1.4332759380340576, 'diff_eval': 39647.47870463867} step=151000
2025-12-06 21:13.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.55it/s, critic_loss=270, actor_loss=-1.4, dc_loss=0.104]   


2025-12-06 21:14.12 [info     ] PRDC_20251206192337: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.005071475267410279, 'time_algorithm_update': 0.03485167741775513, 'critic_loss': 271.8299968490601, 'actor_loss': -1.4012882813811303, 'dc_loss': 0.10346527540683746, 'time_step': 0.04018216753005981, 'td_error': 124.44482351162294, 'value_scale': 8.691932657763225, 'discounted_advantage': -58.642969951029656, 'initial_state': 1.6140501499176025, 'diff_eval': 40517.80244002996} step=152000
2025-12-06 21:14.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.66it/s, critic_loss=237, actor_loss=-1.42, dc_loss=0.102]


2025-12-06 21:14.58 [info     ] PRDC_20251206192337: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.005020482301712036, 'time_algorithm_update': 0.036400522708892824, 'critic_loss': 236.08066300201415, 'actor_loss': -1.4215158907175065, 'dc_loss': 0.10172136369347572, 'time_step': 0.04169023942947388, 'td_error': 120.53988887506794, 'value_scale': 8.04771654163577, 'discounted_advantage': -57.98691263242893, 'initial_state': -1.3579070568084717, 'diff_eval': 40252.63022617889} step=153000
2025-12-06 21:14.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.06it/s, critic_loss=228, actor_loss=-1.44, dc_loss=0.101]


2025-12-06 21:15.41 [info     ] PRDC_20251206192337: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.004958978891372681, 'time_algorithm_update': 0.03414529061317444, 'critic_loss': 226.74807737350463, 'actor_loss': -1.443807138800621, 'dc_loss': 0.10098286637663842, 'time_step': 0.03935976505279541, 'td_error': 125.68720034252867, 'value_scale': 10.279597438390976, 'discounted_advantage': -59.23548061452567, 'initial_state': 2.9768729209899902, 'diff_eval': 43739.01483965485} step=154000
2025-12-06 21:15.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.82it/s, critic_loss=246, actor_loss=-1.53, dc_loss=0.101]


2025-12-06 21:16.25 [info     ] PRDC_20251206192337: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.004914953470230103, 'time_algorithm_update': 0.034623611211776734, 'critic_loss': 245.19517756652832, 'actor_loss': -1.5343655880689622, 'dc_loss': 0.10081032012403011, 'time_step': 0.03978430604934692, 'td_error': 109.23315469527179, 'value_scale': 9.426333396715071, 'discounted_advantage': -55.00382215158045, 'initial_state': 2.1620731353759766, 'diff_eval': 40577.88077584668} step=155000
2025-12-06 21:16.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.25it/s, critic_loss=246, actor_loss=-1.67, dc_loss=0.104]


2025-12-06 21:17.09 [info     ] PRDC_20251206192337: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.006336918115615845, 'time_algorithm_update': 0.03410781931877136, 'critic_loss': 245.57651933670044, 'actor_loss': -1.6754969803094864, 'dc_loss': 0.10343219637870789, 'time_step': 0.0407120840549469, 'td_error': 100.93478823624642, 'value_scale': 10.724485920256587, 'discounted_advantage': -51.67083757171075, 'initial_state': 6.005329132080078, 'diff_eval': 39175.65512335797} step=156000
2025-12-06 21:17.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.16it/s, critic_loss=257, actor_loss=-1.85, dc_loss=0.102]


2025-12-06 21:17.53 [info     ] PRDC_20251206192337: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.004938951253890991, 'time_algorithm_update': 0.03402783131599426, 'critic_loss': 256.1094807319641, 'actor_loss': -1.846086878299713, 'dc_loss': 0.10225778837502003, 'time_step': 0.03922162485122681, 'td_error': 94.00245731101533, 'value_scale': 11.947843048318733, 'discounted_advantage': -56.47441216294908, 'initial_state': 4.842139720916748, 'diff_eval': 38412.654675149664} step=157000
2025-12-06 21:17.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.96it/s, critic_loss=263, actor_loss=-1.92, dc_loss=0.102]


2025-12-06 21:18.36 [info     ] PRDC_20251206192337: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.005077342033386231, 'time_algorithm_update': 0.03420107531547546, 'critic_loss': 260.9067570724487, 'actor_loss': -1.9199113881587981, 'dc_loss': 0.10166061721742153, 'time_step': 0.03952927541732788, 'td_error': 82.39383730136991, 'value_scale': 13.278134101469378, 'discounted_advantage': -53.98479793570535, 'initial_state': 6.989412784576416, 'diff_eval': 39438.78552009643} step=158000
2025-12-06 21:18.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.37it/s, critic_loss=290, actor_loss=-1.99, dc_loss=0.104]


2025-12-06 21:19.19 [info     ] PRDC_20251206192337: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.004917723178863525, 'time_algorithm_update': 0.03373243951797485, 'critic_loss': 294.1723600463867, 'actor_loss': -1.9928443629741668, 'dc_loss': 0.10437304411828519, 'time_step': 0.03890203166007995, 'td_error': 70.83876463190035, 'value_scale': 11.479015638138428, 'discounted_advantage': -47.127385887896004, 'initial_state': 5.1757941246032715, 'diff_eval': 35904.3501088288} step=159000
2025-12-06 21:19.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.09it/s, critic_loss=276, actor_loss=-2, dc_loss=0.104]  


2025-12-06 21:20.02 [info     ] PRDC_20251206192337: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.004992824792861938, 'time_algorithm_update': 0.0340616455078125, 'critic_loss': 276.00814660644534, 'actor_loss': -2.0046666243076325, 'dc_loss': 0.10419391849637032, 'time_step': 0.03931727695465088, 'td_error': 72.57960204604593, 'value_scale': 14.027522848967434, 'discounted_advantage': -48.94610521083717, 'initial_state': 9.899602890014648, 'diff_eval': 35292.21603103984} step=160000
2025-12-06 21:20.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.82it/s, critic_loss=300, actor_loss=-1.99, dc_loss=0.104]


2025-12-06 21:20.46 [info     ] PRDC_20251206192337: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.005147440910339356, 'time_algorithm_update': 0.034337688207626345, 'critic_loss': 297.877401309967, 'actor_loss': -1.9847781422138213, 'dc_loss': 0.10388570672273637, 'time_step': 0.03975809407234192, 'td_error': 64.68355715540667, 'value_scale': 13.65577810464391, 'discounted_advantage': -47.490846461929515, 'initial_state': 7.396152496337891, 'diff_eval': 39078.74099481628} step=161000
2025-12-06 21:20.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.58it/s, critic_loss=280, actor_loss=-1.95, dc_loss=0.102]


2025-12-06 21:21.30 [info     ] PRDC_20251206192337: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.005223094224929809, 'time_algorithm_update': 0.034617897510528566, 'critic_loss': 278.6469594306946, 'actor_loss': -1.9461025900840758, 'dc_loss': 0.10152527602016925, 'time_step': 0.04010784530639649, 'td_error': 68.99733214820108, 'value_scale': 13.242675546203854, 'discounted_advantage': -49.03860042941752, 'initial_state': 4.820277690887451, 'diff_eval': 35329.60087790732} step=162000
2025-12-06 21:21.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.42it/s, critic_loss=268, actor_loss=-1.95, dc_loss=0.104]


2025-12-06 21:22.13 [info     ] PRDC_20251206192337: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.0048964982032775875, 'time_algorithm_update': 0.03370944046974182, 'critic_loss': 279.64450009155274, 'actor_loss': -1.9484364833831787, 'dc_loss': 0.10373824241757393, 'time_step': 0.03885079741477966, 'td_error': 65.52860009497547, 'value_scale': 12.783110380797254, 'discounted_advantage': -50.51837393691646, 'initial_state': 4.8537068367004395, 'diff_eval': 38351.43157725259} step=163000
2025-12-06 21:22.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.18it/s, critic_loss=232, actor_loss=-1.95, dc_loss=0.102]


2025-12-06 21:22.56 [info     ] PRDC_20251206192337: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.0049027130603790285, 'time_algorithm_update': 0.03402050900459289, 'critic_loss': 231.0085791053772, 'actor_loss': -1.954991916179657, 'dc_loss': 0.10233162857592105, 'time_step': 0.039180357694625856, 'td_error': 67.69467312900377, 'value_scale': 12.513560643124535, 'discounted_advantage': -48.29514845393513, 'initial_state': 7.585482597351074, 'diff_eval': 37950.37154709627} step=164000
2025-12-06 21:22.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.16it/s, critic_loss=324, actor_loss=-1.96, dc_loss=0.103]


2025-12-06 21:23.39 [info     ] PRDC_20251206192337: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.004995543718338013, 'time_algorithm_update': 0.0339723014831543, 'critic_loss': 321.3608450164795, 'actor_loss': -1.9593060512542724, 'dc_loss': 0.10300919355452061, 'time_step': 0.03922288870811463, 'td_error': 59.43659331804827, 'value_scale': 11.39996926268533, 'discounted_advantage': -44.36923680045181, 'initial_state': 6.016641139984131, 'diff_eval': 37226.35586070427} step=165000
2025-12-06 21:23.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.06it/s, critic_loss=323, actor_loss=-1.94, dc_loss=0.101]


2025-12-06 21:24.23 [info     ] PRDC_20251206192337: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.004936204433441162, 'time_algorithm_update': 0.03417037796974182, 'critic_loss': 320.7394317779541, 'actor_loss': -1.936845925807953, 'dc_loss': 0.10035186323523522, 'time_step': 0.039370405673980716, 'td_error': 61.34677830991069, 'value_scale': 11.037841611690913, 'discounted_advantage': -42.47777336764099, 'initial_state': 7.539439678192139, 'diff_eval': 35948.59985482515} step=166000
2025-12-06 21:24.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.21it/s, critic_loss=338, actor_loss=-1.91, dc_loss=0.0979]


2025-12-06 21:25.06 [info     ] PRDC_20251206192337: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.004940916538238525, 'time_algorithm_update': 0.03398663473129272, 'critic_loss': 336.33140413284303, 'actor_loss': -1.9069923906326294, 'dc_loss': 0.09779853789508343, 'time_step': 0.03917344164848328, 'td_error': 55.77578200175201, 'value_scale': 10.079266690557755, 'discounted_advantage': -38.70042192937314, 'initial_state': 5.934158802032471, 'diff_eval': 36808.329951786465} step=167000
2025-12-06 21:25.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.98it/s, critic_loss=358, actor_loss=-1.83, dc_loss=0.0946]


2025-12-06 21:25.50 [info     ] PRDC_20251206192337: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.004975279331207275, 'time_algorithm_update': 0.03431166481971741, 'critic_loss': 355.0186162605286, 'actor_loss': -1.8338638026714325, 'dc_loss': 0.0946313615962863, 'time_step': 0.03953041291236877, 'td_error': 64.3780262450704, 'value_scale': 10.100772914048113, 'discounted_advantage': -41.686688463509284, 'initial_state': 5.489678382873535, 'diff_eval': 34791.518099580186} step=168000
2025-12-06 21:25.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.99it/s, critic_loss=315, actor_loss=-1.82, dc_loss=0.0882]


2025-12-06 21:26.33 [info     ] PRDC_20251206192337: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.0050643994808197024, 'time_algorithm_update': 0.034155388832092286, 'critic_loss': 313.1954905052185, 'actor_loss': -1.8161667059659958, 'dc_loss': 0.0882404298633337, 'time_step': 0.03947642850875854, 'td_error': 56.04734670166705, 'value_scale': 7.39472253772384, 'discounted_advantage': -36.431757040750234, 'initial_state': 2.753807783126831, 'diff_eval': 35873.02681483649} step=169000
2025-12-06 21:26.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.05it/s, critic_loss=346, actor_loss=-1.77, dc_loss=0.081]


2025-12-06 21:27.17 [info     ] PRDC_20251206192337: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.005041874885559082, 'time_algorithm_update': 0.03406749939918518, 'critic_loss': 343.98136423110964, 'actor_loss': -1.773117550969124, 'dc_loss': 0.08093652392923832, 'time_step': 0.039369796514511106, 'td_error': 56.38322948392457, 'value_scale': 7.514175532118722, 'discounted_advantage': -36.425577091973565, 'initial_state': 2.287205457687378, 'diff_eval': 33522.497910291895} step=170000
2025-12-06 21:27.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.08it/s, critic_loss=302, actor_loss=-1.76, dc_loss=0.0795]


2025-12-06 21:28.02 [info     ] PRDC_20251206192337: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.005002980947494507, 'time_algorithm_update': 0.03552012300491333, 'critic_loss': 303.23143788528444, 'actor_loss': -1.7620681864023209, 'dc_loss': 0.07953681256622076, 'time_step': 0.040790438652038574, 'td_error': 52.32580076586293, 'value_scale': 6.295684034437291, 'discounted_advantage': -35.347540503790974, 'initial_state': 1.2009350061416626, 'diff_eval': 32726.535221517235} step=171000
2025-12-06 21:28.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.58it/s, critic_loss=368, actor_loss=-1.73, dc_loss=0.0769]


2025-12-06 21:28.46 [info     ] PRDC_20251206192337: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.00494038724899292, 'time_algorithm_update': 0.03499562382698059, 'critic_loss': 365.04800747299197, 'actor_loss': -1.729487890124321, 'dc_loss': 0.07695175295323134, 'time_step': 0.04018433928489685, 'td_error': 58.148752951661635, 'value_scale': 6.083021983950831, 'discounted_advantage': -37.71251945515292, 'initial_state': -1.6399215459823608, 'diff_eval': 32744.54068994265} step=172000
2025-12-06 21:28.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.63it/s, critic_loss=443, actor_loss=-1.66, dc_loss=0.0777]


2025-12-06 21:29.30 [info     ] PRDC_20251206192337: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.005134805202484131, 'time_algorithm_update': 0.03466040921211243, 'critic_loss': 439.5730640258789, 'actor_loss': -1.6596989300251006, 'dc_loss': 0.07773486287891865, 'time_step': 0.040042876720428465, 'td_error': 51.83200130758811, 'value_scale': 5.102836806448887, 'discounted_advantage': -32.61716852588625, 'initial_state': -0.38273385167121887, 'diff_eval': 34025.50059532412} step=173000
2025-12-06 21:29.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.96it/s, critic_loss=399, actor_loss=-1.65, dc_loss=0.0778]  


2025-12-06 21:30.13 [info     ] PRDC_20251206192337: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.005026611804962158, 'time_algorithm_update': 0.03422953605651855, 'critic_loss': 395.91196559143066, 'actor_loss': -1.6482041933536529, 'dc_loss': 0.07768403927236796, 'time_step': 0.0395279061794281, 'td_error': 55.61078222425095, 'value_scale': 4.769501035460033, 'discounted_advantage': -31.139549050522415, 'initial_state': -1.169501543045044, 'diff_eval': 30446.545103112167} step=174000
2025-12-06 21:30.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=384, actor_loss=-1.54, dc_loss=0.0748]  


2025-12-06 21:30.57 [info     ] PRDC_20251206192337: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.004975631475448609, 'time_algorithm_update': 0.0340017352104187, 'critic_loss': 381.1273080978394, 'actor_loss': -1.5355236697345973, 'dc_loss': 0.07479303221404553, 'time_step': 0.03922905969619751, 'td_error': 56.21474466502048, 'value_scale': 2.9042029828145437, 'discounted_advantage': -31.00090198767314, 'initial_state': -2.326519250869751, 'diff_eval': 33337.183736086066} step=175000
2025-12-06 21:30.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.95it/s, critic_loss=431, actor_loss=-1.45, dc_loss=0.073]


2025-12-06 21:31.40 [info     ] PRDC_20251206192337: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.005115416526794434, 'time_algorithm_update': 0.034172126054763795, 'critic_loss': 428.1180289993286, 'actor_loss': -1.4543056856691838, 'dc_loss': 0.0730764076411724, 'time_step': 0.03954502391815186, 'td_error': 55.39959441747261, 'value_scale': 2.088223679923302, 'discounted_advantage': -26.718473180672724, 'initial_state': -5.677974224090576, 'diff_eval': 32824.67373586764} step=176000
2025-12-06 21:31.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.92it/s, critic_loss=313, actor_loss=-1.36, dc_loss=0.0729]


2025-12-06 21:32.24 [info     ] PRDC_20251206192337: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.005160705327987671, 'time_algorithm_update': 0.03415684652328491, 'critic_loss': 311.00991223526, 'actor_loss': -1.3613731232434512, 'dc_loss': 0.07287106800079346, 'time_step': 0.03958741283416748, 'td_error': 56.52606521752258, 'value_scale': 0.6464092454030698, 'discounted_advantage': -29.67504710549986, 'initial_state': -6.884719371795654, 'diff_eval': 30261.0452532553} step=177000
2025-12-06 21:32.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.34it/s, critic_loss=473, actor_loss=-1.36, dc_loss=0.0695]


2025-12-06 21:33.07 [info     ] PRDC_20251206192337: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.004927310466766357, 'time_algorithm_update': 0.0338045608997345, 'critic_loss': 479.5520113296509, 'actor_loss': -1.3603003838807344, 'dc_loss': 0.06944428822398185, 'time_step': 0.03897773838043213, 'td_error': 53.28157224323257, 'value_scale': 0.36719424390895267, 'discounted_advantage': -27.53494859055685, 'initial_state': -8.352245330810547, 'diff_eval': 29424.988660814564} step=178000
2025-12-06 21:33.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.19it/s, critic_loss=414, actor_loss=-1.37, dc_loss=0.0686]  


2025-12-06 21:33.51 [info     ] PRDC_20251206192337: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.005336968660354614, 'time_algorithm_update': 0.03511939454078674, 'critic_loss': 416.36842981338503, 'actor_loss': -1.3699489893615246, 'dc_loss': 0.0685358037352562, 'time_step': 0.04072450494766235, 'td_error': 67.26142725558626, 'value_scale': 2.3319907963482, 'discounted_advantage': -32.02797625344114, 'initial_state': -5.506107807159424, 'diff_eval': 30531.57009307066} step=179000
2025-12-06 21:33.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.98it/s, critic_loss=502, actor_loss=-1.35, dc_loss=0.0669]


2025-12-06 21:34.35 [info     ] PRDC_20251206192337: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.005065779209136963, 'time_algorithm_update': 0.03417232465744018, 'critic_loss': 498.11638873672484, 'actor_loss': -1.3496364657282829, 'dc_loss': 0.06691189115494489, 'time_step': 0.039494993448257444, 'td_error': 63.39833242382346, 'value_scale': -0.050319149968435256, 'discounted_advantage': -30.091603424444912, 'initial_state': -10.513593673706055, 'diff_eval': 32142.592118890712} step=180000
2025-12-06 21:34.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.27it/s, critic_loss=460, actor_loss=-1.22, dc_loss=0.065]


2025-12-06 21:35.18 [info     ] PRDC_20251206192337: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.005022877216339111, 'time_algorithm_update': 0.03378062558174133, 'critic_loss': 456.62029458236697, 'actor_loss': -1.2225076503455639, 'dc_loss': 0.06495207053422927, 'time_step': 0.03905177307128906, 'td_error': 80.40846078639758, 'value_scale': -2.182505063122421, 'discounted_advantage': -26.96751851519458, 'initial_state': -11.406457901000977, 'diff_eval': 34793.41926443609} step=181000
2025-12-06 21:35.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:42<00:00, 23.65it/s, critic_loss=485, actor_loss=-1.11, dc_loss=0.0674]


2025-12-06 21:36.04 [info     ] PRDC_20251206192337: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.0050850071907043454, 'time_algorithm_update': 0.0363086416721344, 'critic_loss': 483.12355062484744, 'actor_loss': -1.112923754967749, 'dc_loss': 0.06743231831490994, 'time_step': 0.04167546010017395, 'td_error': 77.13115233792733, 'value_scale': -1.516019600947065, 'discounted_advantage': -27.885598145087, 'initial_state': -13.67138385772705, 'diff_eval': 31555.336736705907} step=182000
2025-12-06 21:36.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.46it/s, critic_loss=477, actor_loss=-1.03, dc_loss=0.0656]


2025-12-06 21:36.48 [info     ] PRDC_20251206192337: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.00511025071144104, 'time_algorithm_update': 0.03493543863296509, 'critic_loss': 475.5119572067261, 'actor_loss': -1.0276285487413406, 'dc_loss': 0.06562536514550447, 'time_step': 0.0402975161075592, 'td_error': 93.51436980597728, 'value_scale': -3.4022002755433998, 'discounted_advantage': -31.078300583039876, 'initial_state': -15.525472640991211, 'diff_eval': 33351.96880929498} step=183000
2025-12-06 21:36.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.01it/s, critic_loss=495, actor_loss=-0.948, dc_loss=0.0652]


2025-12-06 21:37.31 [info     ] PRDC_20251206192337: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.005089046955108642, 'time_algorithm_update': 0.03408476090431213, 'critic_loss': 506.81497274780276, 'actor_loss': -0.9502729649394751, 'dc_loss': 0.06520429647713899, 'time_step': 0.03943800950050354, 'td_error': 103.1372176488791, 'value_scale': -4.236037362126472, 'discounted_advantage': -33.018233439061916, 'initial_state': -13.644603729248047, 'diff_eval': 33962.15790511791} step=184000
2025-12-06 21:37.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.95it/s, critic_loss=442, actor_loss=-0.892, dc_loss=0.0666]  


2025-12-06 21:38.15 [info     ] PRDC_20251206192337: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.004986800193786621, 'time_algorithm_update': 0.03431439518928528, 'critic_loss': 438.6402156867981, 'actor_loss': -0.8925859345495701, 'dc_loss': 0.06655182103812694, 'time_step': 0.03955372023582458, 'td_error': 106.50560467196846, 'value_scale': -2.003302463529396, 'discounted_advantage': -35.24890530981343, 'initial_state': -12.800149917602539, 'diff_eval': 34039.35108648032} step=185000
2025-12-06 21:38.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.35it/s, critic_loss=447, actor_loss=-0.84, dc_loss=0.0656]


2025-12-06 21:38.58 [info     ] PRDC_20251206192337: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.00496451187133789, 'time_algorithm_update': 0.03371467733383179, 'critic_loss': 472.09233394241335, 'actor_loss': -0.84072745693475, 'dc_loss': 0.06559899089485408, 'time_step': 0.038940245628356934, 'td_error': 99.22700069291393, 'value_scale': -5.009497513190499, 'discounted_advantage': -27.632867441829397, 'initial_state': -16.942119598388672, 'diff_eval': 35175.98016267802} step=186000
2025-12-06 21:38.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.04it/s, critic_loss=584, actor_loss=-0.873, dc_loss=0.0659]  


2025-12-06 21:39.41 [info     ] PRDC_20251206192337: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.00516305685043335, 'time_algorithm_update': 0.033994389295578006, 'critic_loss': 579.3359192581177, 'actor_loss': -0.8714385801181197, 'dc_loss': 0.065945058144629, 'time_step': 0.0394115047454834, 'td_error': 125.28693058462031, 'value_scale': -2.5924245033534956, 'discounted_advantage': -38.1464477373852, 'initial_state': -14.738131523132324, 'diff_eval': 33578.88452930928} step=187000
2025-12-06 21:39.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.07it/s, critic_loss=511, actor_loss=-0.888, dc_loss=0.0661]  


2025-12-06 21:40.24 [info     ] PRDC_20251206192337: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.004955219507217407, 'time_algorithm_update': 0.034122996091842654, 'critic_loss': 509.4180816307068, 'actor_loss': -0.8893204285427928, 'dc_loss': 0.06613444117456675, 'time_step': 0.03934717273712158, 'td_error': 138.41267904442105, 'value_scale': -0.401396539299581, 'discounted_advantage': -47.72280796504898, 'initial_state': -10.88199234008789, 'diff_eval': 35509.20859316866} step=188000
2025-12-06 21:40.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.84it/s, critic_loss=463, actor_loss=-0.803, dc_loss=0.0654]


2025-12-06 21:41.08 [info     ] PRDC_20251206192337: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.005072880506515503, 'time_algorithm_update': 0.03434682559967041, 'critic_loss': 474.5466149864197, 'actor_loss': -0.8052498112693429, 'dc_loss': 0.06536731103807687, 'time_step': 0.039702967405319214, 'td_error': 134.78426873827948, 'value_scale': -1.2383909882339028, 'discounted_advantage': -38.8566047309486, 'initial_state': -11.75056266784668, 'diff_eval': 34508.286050408795} step=189000
2025-12-06 21:41.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.08it/s, critic_loss=586, actor_loss=-0.846, dc_loss=0.0647]


2025-12-06 21:41.52 [info     ] PRDC_20251206192337: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.005026711940765381, 'time_algorithm_update': 0.03403519058227539, 'critic_loss': 592.5344254608154, 'actor_loss': -0.8443206338472664, 'dc_loss': 0.06476054618507623, 'time_step': 0.039319376230239865, 'td_error': 125.55238368572893, 'value_scale': -2.390653127515556, 'discounted_advantage': -37.2084837314799, 'initial_state': -12.159043312072754, 'diff_eval': 34076.460056436896} step=190000
2025-12-06 21:41.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:40<00:00, 24.96it/s, critic_loss=542, actor_loss=-0.844, dc_loss=0.0647]


2025-12-06 21:42.35 [info     ] PRDC_20251206192337: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.005064419031143188, 'time_algorithm_update': 0.03420185947418213, 'critic_loss': 542.2598546485901, 'actor_loss': -0.8445809906572104, 'dc_loss': 0.06467242373526097, 'time_step': 0.03953437638282776, 'td_error': 135.18961840572987, 'value_scale': -1.9545525381277562, 'discounted_advantage': -39.03148779787982, 'initial_state': -12.202188491821289, 'diff_eval': 34632.79179527531} step=191000
2025-12-06 21:42.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.38it/s, critic_loss=486, actor_loss=-0.866, dc_loss=0.0653]


2025-12-06 21:43.20 [info     ] PRDC_20251206192337: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.005301204204559326, 'time_algorithm_update': 0.03483753967285156, 'critic_loss': 482.91780997467043, 'actor_loss': -0.8660257077887654, 'dc_loss': 0.06533187829703092, 'time_step': 0.04041082453727722, 'td_error': 146.3909689844554, 'value_scale': -1.5476620502813663, 'discounted_advantage': -42.32019338689876, 'initial_state': -13.889071464538574, 'diff_eval': 36297.66277394436} step=192000
2025-12-06 21:43.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.13it/s, critic_loss=553, actor_loss=-0.899, dc_loss=0.065]


2025-12-06 21:44.03 [info     ] PRDC_20251206192337: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.005014949798583984, 'time_algorithm_update': 0.033985961198806765, 'critic_loss': 548.8514839477539, 'actor_loss': -0.8968977670222521, 'dc_loss': 0.06501953893154859, 'time_step': 0.039256781339645386, 'td_error': 126.48611848527476, 'value_scale': -2.640805896249821, 'discounted_advantage': -30.810251513969096, 'initial_state': -15.102665901184082, 'diff_eval': 33045.22434973852} step=193000
2025-12-06 21:44.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.18it/s, critic_loss=599, actor_loss=-0.881, dc_loss=0.0652]


2025-12-06 21:44.46 [info     ] PRDC_20251206192337: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.005057837963104248, 'time_algorithm_update': 0.0338646354675293, 'critic_loss': 595.4532751808166, 'actor_loss': -0.8802485592290759, 'dc_loss': 0.06515428058058023, 'time_step': 0.03918242955207825, 'td_error': 133.08919411831846, 'value_scale': -3.014180018954535, 'discounted_advantage': -35.595661462550325, 'initial_state': -13.331494331359863, 'diff_eval': 36311.62487325353} step=194000
2025-12-06 21:44.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.14it/s, critic_loss=536, actor_loss=-0.894, dc_loss=0.0645]


2025-12-06 21:45.29 [info     ] PRDC_20251206192337: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.0049913120269775395, 'time_algorithm_update': 0.03399709129333496, 'critic_loss': 532.0508906173706, 'actor_loss': -0.8921236739680171, 'dc_loss': 0.06449612749367953, 'time_step': 0.039238978147506716, 'td_error': 141.69002769287914, 'value_scale': -3.301626825947174, 'discounted_advantage': -38.71596414456889, 'initial_state': -14.705292701721191, 'diff_eval': 36263.65355418266} step=195000
2025-12-06 21:45.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, critic_loss=556, actor_loss=-0.907, dc_loss=0.065]


2025-12-06 21:46.13 [info     ] PRDC_20251206192337: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.004985345840454102, 'time_algorithm_update': 0.033966588497161866, 'critic_loss': 552.7799278335572, 'actor_loss': -0.9052071371749043, 'dc_loss': 0.06499356617033482, 'time_step': 0.039204308271408084, 'td_error': 150.69128297611488, 'value_scale': -2.9442258813455138, 'discounted_advantage': -40.605116593366326, 'initial_state': -16.524272918701172, 'diff_eval': 36255.31551472409} step=196000
2025-12-06 21:46.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:39<00:00, 25.14it/s, critic_loss=623, actor_loss=-0.954, dc_loss=0.0643] 


2025-12-06 21:46.56 [info     ] PRDC_20251206192337: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.005093574047088623, 'time_algorithm_update': 0.03392954015731812, 'critic_loss': 618.557187877655, 'actor_loss': -0.9537901436686516, 'dc_loss': 0.06430308697372675, 'time_step': 0.03927616453170776, 'td_error': 140.4339674759249, 'value_scale': -2.7141129877402665, 'discounted_advantage': -37.718640152096256, 'initial_state': -13.642786026000977, 'diff_eval': 34086.15578367478} step=197000
2025-12-06 21:46.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.08it/s, critic_loss=614, actor_loss=-0.926, dc_loss=0.0652]  


2025-12-06 21:47.41 [info     ] PRDC_20251206192337: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.00520703649520874, 'time_algorithm_update': 0.03550885248184204, 'critic_loss': 609.4564756698609, 'actor_loss': -0.9235938311442733, 'dc_loss': 0.06520896656811237, 'time_step': 0.040981499433517456, 'td_error': 147.7883334245835, 'value_scale': -1.6681495727432543, 'discounted_advantage': -38.97617409956645, 'initial_state': -12.598675727844238, 'diff_eval': 34427.99783833183} step=198000
2025-12-06 21:47.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:41<00:00, 23.90it/s, critic_loss=590, actor_loss=-0.993, dc_loss=0.0653]


2025-12-06 21:48.26 [info     ] PRDC_20251206192337: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.005231937646865845, 'time_algorithm_update': 0.035771803617477414, 'critic_loss': 585.9163156280517, 'actor_loss': -0.9915307677984238, 'dc_loss': 0.06534162347018718, 'time_step': 0.04127748703956604, 'td_error': 149.58199716802946, 'value_scale': 0.0690036681289445, 'discounted_advantage': -42.16303543563917, 'initial_state': -9.648175239562988, 'diff_eval': 35927.55213589262} step=199000
2025-12-06 21:48.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:41<00:00, 24.32it/s, critic_loss=671, actor_loss=-1.02, dc_loss=0.0652]


2025-12-06 21:49.11 [info     ] PRDC_20251206192337: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.005136298418045044, 'time_algorithm_update': 0.03513076114654541, 'critic_loss': 666.5370360527039, 'actor_loss': -1.0218595898896456, 'dc_loss': 0.06524312189221382, 'time_step': 0.040528868198394775, 'td_error': 153.55503572919932, 'value_scale': -2.0719554914381706, 'discounted_advantage': -37.477850618844, 'initial_state': -13.65442180633545, 'diff_eval': 37420.780734092645} step=200000
2025-12-06 21:49.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\PRDC_20251206192337\model_200000.d3
Training model:  ReBRAC
2025-12-06 21:49.11 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size

Epoch 1/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.54it/s, critic_loss=0.162, actor_loss=-0.923, bc_loss=1.9] 


2025-12-06 21:49.47 [info     ] ReBRAC_20251206214911: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.01920321464538574, 'time_algorithm_update': 0.012748104572296142, 'critic_loss': 0.1624367468804121, 'actor_loss': -0.923372406244278, 'bc_loss': 1.9128235978633166, 'time_step': 0.032216844081878665, 'td_error': 0.9728807902738061, 'value_scale': 2.3741086711443504, 'discounted_advantage': -4.584976030352926, 'initial_state': 2.6587822437286377, 'diff_eval': 113463.06646298763} step=1000
2025-12-06 21:49.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.32it/s, critic_loss=0.306, actor_loss=-0.967, bc_loss=2.97]


2025-12-06 21:50.24 [info     ] ReBRAC_20251206214911: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.019375198125839235, 'time_algorithm_update': 0.012851714372634888, 'critic_loss': 0.30635353714227676, 'actor_loss': -0.9673564786911011, 'bc_loss': 2.9711349420547486, 'time_step': 0.03247509574890137, 'td_error': 1.0692369783983493, 'value_scale': 3.839083457726295, 'discounted_advantage': -6.274129289793755, 'initial_state': 4.114640712738037, 'diff_eval': 113472.30804847532} step=2000
2025-12-06 21:50.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.82it/s, critic_loss=0.595, actor_loss=-0.975, bc_loss=2.97]


2025-12-06 21:51.00 [info     ] ReBRAC_20251206214911: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.019037351369857788, 'time_algorithm_update': 0.012674047946929931, 'critic_loss': 0.5962043009996414, 'actor_loss': -0.974896089553833, 'bc_loss': 2.972677483558655, 'time_step': 0.031941141366958616, 'td_error': 1.50856006787048, 'value_scale': 5.259630086816916, 'discounted_advantage': -9.348217790079831, 'initial_state': 5.68433952331543, 'diff_eval': 113473.01487989018} step=3000
2025-12-06 21:51.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.13it/s, critic_loss=0.958, actor_loss=-0.98, bc_loss=2.97]


2025-12-06 21:51.35 [info     ] ReBRAC_20251206214911: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.018763252735137938, 'time_algorithm_update': 0.012594080924987794, 'critic_loss': 0.9591249558329582, 'actor_loss': -0.9804122567176818, 'bc_loss': 2.9710462441444396, 'time_step': 0.03161451768875122, 'td_error': 2.0336156250423314, 'value_scale': 6.610241665942821, 'discounted_advantage': -11.707339959384642, 'initial_state': 7.589476108551025, 'diff_eval': 113473.28047716088} step=4000
2025-12-06 21:51.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.46it/s, critic_loss=1.4, actor_loss=-0.984, bc_loss=2.97]


2025-12-06 21:52.11 [info     ] ReBRAC_20251206214911: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.019361570596694945, 'time_algorithm_update': 0.012673669099807739, 'critic_loss': 1.40537132447958, 'actor_loss': -0.9835653589963913, 'bc_loss': 2.973288053512573, 'time_step': 0.03229996800422669, 'td_error': 1.6400877249380308, 'value_scale': 7.385573822227427, 'discounted_advantage': -10.757614451211904, 'initial_state': 9.33776569366455, 'diff_eval': 113473.04639858812} step=5000
2025-12-06 21:52.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.07it/s, critic_loss=2.01, actor_loss=-0.984, bc_loss=2.97]


2025-12-06 21:52.48 [info     ] ReBRAC_20251206214911: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.019385388374328613, 'time_algorithm_update': 0.013055558443069458, 'critic_loss': 2.0102591146826745, 'actor_loss': -0.9844250519275666, 'bc_loss': 2.9731115231513976, 'time_step': 0.032709768772125246, 'td_error': 2.7181133464125478, 'value_scale': 8.815513722198856, 'discounted_advantage': -14.561015112354305, 'initial_state': 10.747509956359863, 'diff_eval': 113473.60019097404} step=6000
2025-12-06 21:52.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.52it/s, critic_loss=2.77, actor_loss=-0.986, bc_loss=2.97]


2025-12-06 21:53.24 [info     ] ReBRAC_20251206214911: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.019049698114395143, 'time_algorithm_update': 0.012882280111312866, 'critic_loss': 2.775307688832283, 'actor_loss': -0.9864936910867691, 'bc_loss': 2.973066927909851, 'time_step': 0.032212409019470215, 'td_error': 6.150218934348369, 'value_scale': 11.347906909133593, 'discounted_advantage': -23.244672559683163, 'initial_state': 13.006012916564941, 'diff_eval': 113474.14052124535} step=7000
2025-12-06 21:53.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.00it/s, critic_loss=4.07, actor_loss=-0.989, bc_loss=2.97]


2025-12-06 21:54.01 [info     ] ReBRAC_20251206214911: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.01957787775993347, 'time_algorithm_update': 0.01299016809463501, 'critic_loss': 4.07121355164051, 'actor_loss': -0.9889064103364944, 'bc_loss': 2.9714151344299315, 'time_step': 0.03281134057044983, 'td_error': 9.278293958833787, 'value_scale': 14.349960979308436, 'discounted_advantage': -31.24592728400243, 'initial_state': 15.36266803741455, 'diff_eval': 113474.24650590357} step=8000
2025-12-06 21:54.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.14it/s, critic_loss=5.47, actor_loss=-0.989, bc_loss=2.97]


2025-12-06 21:54.38 [info     ] ReBRAC_20251206214911: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.019441206693649293, 'time_algorithm_update': 0.01295551609992981, 'critic_loss': 5.4848183240890505, 'actor_loss': -0.9892405072450637, 'bc_loss': 2.9728986802101134, 'time_step': 0.032658190965652464, 'td_error': 15.415857467683828, 'value_scale': 18.748326094619685, 'discounted_advantage': -41.97317957871436, 'initial_state': 18.44838523864746, 'diff_eval': 113474.25535308506} step=9000
2025-12-06 21:54.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.19it/s, critic_loss=8.14, actor_loss=-0.988, bc_loss=2.97]


2025-12-06 21:55.14 [info     ] ReBRAC_20251206214911: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.019481611728668212, 'time_algorithm_update': 0.012875094652175904, 'critic_loss': 8.149835681200027, 'actor_loss': -0.9883980368375778, 'bc_loss': 2.97273220539093, 'time_step': 0.032608745336532595, 'td_error': 18.62399829124014, 'value_scale': 22.813463097421042, 'discounted_advantage': -49.47496750360499, 'initial_state': 18.923837661743164, 'diff_eval': 113474.27278427545} step=10000
2025-12-06 21:55.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.18it/s, critic_loss=12, actor_loss=-0.988, bc_loss=2.97] 


2025-12-06 21:55.51 [info     ] ReBRAC_20251206214911: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.01949778127670288, 'time_algorithm_update': 0.012894489526748657, 'critic_loss': 11.993547080993652, 'actor_loss': -0.988001123547554, 'bc_loss': 2.974436463356018, 'time_step': 0.03265222334861755, 'td_error': 36.034371003041834, 'value_scale': 29.921734806203283, 'discounted_advantage': -68.76812072898693, 'initial_state': 23.212783813476562, 'diff_eval': 113474.28462417488} step=11000
2025-12-06 21:55.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.76it/s, critic_loss=16.7, actor_loss=-0.989, bc_loss=2.97]


2025-12-06 21:56.28 [info     ] ReBRAC_20251206214911: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.019829061031341553, 'time_algorithm_update': 0.01294491481781006, 'critic_loss': 16.75939709997177, 'actor_loss': -0.9888815743923187, 'bc_loss': 2.974102741241455, 'time_step': 0.03304744648933411, 'td_error': 39.353756193092146, 'value_scale': 36.491631319342, 'discounted_advantage': -76.50536405782985, 'initial_state': 24.1173152923584, 'diff_eval': 113474.29300394881} step=12000
2025-12-06 21:56.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.60it/s, critic_loss=21.5, actor_loss=-0.991, bc_loss=2.97]


2025-12-06 21:57.04 [info     ] ReBRAC_20251206214911: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.019145466327667237, 'time_algorithm_update': 0.012681724309921264, 'critic_loss': 21.546595219612122, 'actor_loss': -0.9910338507890701, 'bc_loss': 2.9724444456100465, 'time_step': 0.032124805450439456, 'td_error': 57.039473919022186, 'value_scale': 46.04933979445311, 'discounted_advantage': -92.65254689603317, 'initial_state': 30.749692916870117, 'diff_eval': 113474.29827967833} step=13000
2025-12-06 21:57.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.92it/s, critic_loss=27.2, actor_loss=-0.993, bc_loss=2.97]


2025-12-06 21:57.40 [info     ] ReBRAC_20251206214911: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.018943717479705812, 'time_algorithm_update': 0.012626365184783935, 'critic_loss': 27.25510091972351, 'actor_loss': -0.9934705792665481, 'bc_loss': 2.9724021010398864, 'time_step': 0.03184013843536377, 'td_error': 62.386590679043955, 'value_scale': 56.27968177751443, 'discounted_advantage': -103.17993911949401, 'initial_state': 36.101863861083984, 'diff_eval': 113474.30012147684} step=14000
2025-12-06 21:57.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.23it/s, critic_loss=34.8, actor_loss=-0.995, bc_loss=2.97]


2025-12-06 21:58.17 [info     ] ReBRAC_20251206214911: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.01948826789855957, 'time_algorithm_update': 0.012842223405838012, 'critic_loss': 34.79060137939453, 'actor_loss': -0.9951463981866836, 'bc_loss': 2.9733910994529724, 'time_step': 0.032578823566436765, 'td_error': 89.25413553103752, 'value_scale': 69.89439514616609, 'discounted_advantage': -124.22934690752882, 'initial_state': 45.781471252441406, 'diff_eval': 113474.30334830783} step=15000
2025-12-06 21:58.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.02it/s, critic_loss=40, actor_loss=-0.996, bc_loss=2.97] 


2025-12-06 21:58.52 [info     ] ReBRAC_20251206214911: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.01896526885032654, 'time_algorithm_update': 0.012552846193313599, 'critic_loss': 40.01045249938965, 'actor_loss': -0.996183886885643, 'bc_loss': 2.9719940285682678, 'time_step': 0.03175677371025085, 'td_error': 87.67774863396353, 'value_scale': 81.15251005155542, 'discounted_advantage': -135.9343668597216, 'initial_state': 52.069549560546875, 'diff_eval': 113474.30473825178} step=16000
2025-12-06 21:58.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.81it/s, critic_loss=45.4, actor_loss=-0.997, bc_loss=2.97]


2025-12-06 21:59.28 [info     ] ReBRAC_20251206214911: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.018997231006622314, 'time_algorithm_update': 0.012648730516433716, 'critic_loss': 45.47063431930542, 'actor_loss': -0.9965264648199081, 'bc_loss': 2.972606246471405, 'time_step': 0.03192213249206543, 'td_error': 128.65718005515114, 'value_scale': 97.69716967735987, 'discounted_advantage': -162.2655377468521, 'initial_state': 63.6355094909668, 'diff_eval': 113474.30356932366} step=17000
2025-12-06 21:59.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.88it/s, critic_loss=55.7, actor_loss=-0.997, bc_loss=2.97]


2025-12-06 22:00.05 [info     ] ReBRAC_20251206214911: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.019690558671951294, 'time_algorithm_update': 0.013014776706695557, 'critic_loss': 55.74320079612732, 'actor_loss': -0.9965282039642334, 'bc_loss': 2.9726461429595945, 'time_step': 0.03295605373382569, 'td_error': 123.3908400324239, 'value_scale': 111.63329786327039, 'discounted_advantage': -176.85818663624792, 'initial_state': 74.6806869506836, 'diff_eval': 113474.3006879322} step=18000
2025-12-06 22:00.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.53it/s, critic_loss=69.2, actor_loss=-0.996, bc_loss=2.97]


2025-12-06 22:00.42 [info     ] ReBRAC_20251206214911: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.019833569288253784, 'time_algorithm_update': 0.013244950771331787, 'critic_loss': 69.17648342514038, 'actor_loss': -0.9963759279251099, 'bc_loss': 2.9728212146759034, 'time_step': 0.033344666719436644, 'td_error': 196.76306494515333, 'value_scale': 130.27444364658845, 'discounted_advantage': -218.30418352125884, 'initial_state': 89.0909652709961, 'diff_eval': 113474.30349237741} step=19000
2025-12-06 22:00.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.23it/s, critic_loss=90.5, actor_loss=-0.996, bc_loss=2.97]


2025-12-06 22:01.19 [info     ] ReBRAC_20251206214911: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.019327398538589478, 'time_algorithm_update': 0.012986823081970215, 'critic_loss': 90.54649303817749, 'actor_loss': -0.9959873350858688, 'bc_loss': 2.9708527159690856, 'time_step': 0.03257574224472046, 'td_error': 252.4451009639269, 'value_scale': 151.10280256719022, 'discounted_advantage': -251.7353357866802, 'initial_state': 102.0468521118164, 'diff_eval': 113474.30802729464} step=20000
2025-12-06 22:01.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.01it/s, critic_loss=110, actor_loss=-0.996, bc_loss=2.97]


2025-12-06 22:01.55 [info     ] ReBRAC_20251206214911: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.018942253828048705, 'time_algorithm_update': 0.012547930240631104, 'critic_loss': 110.46070904159546, 'actor_loss': -0.9960586082935333, 'bc_loss': 2.974481173992157, 'time_step': 0.03174904346466065, 'td_error': 317.5292349587264, 'value_scale': 174.3490791580603, 'discounted_advantage': -288.88376347343757, 'initial_state': 114.2567367553711, 'diff_eval': 113474.30846605197} step=21000
2025-12-06 22:01.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.19it/s, critic_loss=133, actor_loss=-0.996, bc_loss=2.97]


2025-12-06 22:02.31 [info     ] ReBRAC_20251206214911: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.01948848271369934, 'time_algorithm_update': 0.01288111162185669, 'critic_loss': 133.39009629821777, 'actor_loss': -0.9959886509180069, 'bc_loss': 2.9705965609550478, 'time_step': 0.03261446237564087, 'td_error': 356.9578567761179, 'value_scale': 198.97401322443147, 'discounted_advantage': -326.41609524667734, 'initial_state': 130.7124481201172, 'diff_eval': 113474.30832853101} step=22000
2025-12-06 22:02.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.29it/s, critic_loss=160, actor_loss=-0.996, bc_loss=2.97]


2025-12-06 22:03.08 [info     ] ReBRAC_20251206214911: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.01945770525932312, 'time_algorithm_update': 0.012781986951828002, 'critic_loss': 160.13281407928466, 'actor_loss': -0.9958817232847214, 'bc_loss': 2.9724275274276732, 'time_step': 0.03250832891464234, 'td_error': 601.1741926553234, 'value_scale': 230.51737110672732, 'discounted_advantage': -383.80300450988483, 'initial_state': 151.98056030273438, 'diff_eval': 113474.30834490256} step=23000
2025-12-06 22:03.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.27it/s, critic_loss=182, actor_loss=-0.996, bc_loss=2.97]


2025-12-06 22:03.44 [info     ] ReBRAC_20251206214911: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.01943825888633728, 'time_algorithm_update': 0.01284476375579834, 'critic_loss': 181.7090721435547, 'actor_loss': -0.9956936198472977, 'bc_loss': 2.970045219898224, 'time_step': 0.032534568309783934, 'td_error': 492.38093069137904, 'value_scale': 255.75549654668853, 'discounted_advantage': -387.61589249028975, 'initial_state': 168.65292358398438, 'diff_eval': 113474.30777353574} step=24000
2025-12-06 22:03.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.59it/s, critic_loss=211, actor_loss=-0.995, bc_loss=2.97]


2025-12-06 22:04.20 [info     ] ReBRAC_20251206214911: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.019238625526428223, 'time_algorithm_update': 0.012657589673995972, 'critic_loss': 211.30254530334471, 'actor_loss': -0.9953846256732941, 'bc_loss': 2.9731558194160463, 'time_step': 0.03217344856262207, 'td_error': 485.54613380325543, 'value_scale': 279.44629354189146, 'discounted_advantage': -409.128051501586, 'initial_state': 189.08029174804688, 'diff_eval': 113474.30589244551} step=25000
2025-12-06 22:04.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:35<00:00, 28.43it/s, critic_loss=227, actor_loss=-0.994, bc_loss=2.97]


2025-12-06 22:04.59 [info     ] ReBRAC_20251206214911: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.020262749671936035, 'time_algorithm_update': 0.014118430614471435, 'critic_loss': 227.04963638305665, 'actor_loss': -0.9943869416713714, 'bc_loss': 2.9734542360305785, 'time_step': 0.03464089560508728, 'td_error': 544.1048884859523, 'value_scale': 301.10827522285825, 'discounted_advantage': -422.6048705434466, 'initial_state': 201.22390747070312, 'diff_eval': 113474.30832361955} step=26000
2025-12-06 22:04.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:34<00:00, 29.34it/s, critic_loss=263, actor_loss=-0.992, bc_loss=2.97]


2025-12-06 22:05.37 [info     ] ReBRAC_20251206214911: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.01994165325164795, 'time_algorithm_update': 0.013262450695037842, 'critic_loss': 263.2159223022461, 'actor_loss': -0.9916105649471283, 'bc_loss': 2.972582384109497, 'time_step': 0.03349533581733704, 'td_error': 712.553479205834, 'value_scale': 327.6074581535944, 'discounted_advantage': -478.3568591656847, 'initial_state': 226.27371215820312, 'diff_eval': 113474.30845950336} step=27000
2025-12-06 22:05.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.09it/s, critic_loss=293, actor_loss=-0.987, bc_loss=2.97]


2025-12-06 22:06.13 [info     ] ReBRAC_20251206214911: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.019160603284835816, 'time_algorithm_update': 0.013254919052124024, 'critic_loss': 293.2664596557617, 'actor_loss': -0.9872715085744858, 'bc_loss': 2.970544255256653, 'time_step': 0.032689948320388795, 'td_error': 712.1041620186579, 'value_scale': 344.07280409700815, 'discounted_advantage': -506.52809494038013, 'initial_state': 232.09481811523438, 'diff_eval': 113474.30839401718} step=28000
2025-12-06 22:06.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.12it/s, critic_loss=310, actor_loss=-0.979, bc_loss=2.97]


2025-12-06 22:06.50 [info     ] ReBRAC_20251206214911: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.01960856556892395, 'time_algorithm_update': 0.012797619819641113, 'critic_loss': 309.8016130981445, 'actor_loss': -0.9785933638811112, 'bc_loss': 2.9730918531417845, 'time_step': 0.03267033076286316, 'td_error': 838.9714674819925, 'value_scale': 366.5453504788606, 'discounted_advantage': -528.6298042913457, 'initial_state': 254.7543182373047, 'diff_eval': 113474.3084398575} step=29000
2025-12-06 22:06.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.08it/s, critic_loss=338, actor_loss=-0.962, bc_loss=2.97]


2025-12-06 22:07.27 [info     ] ReBRAC_20251206214911: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.019847003698349, 'time_algorithm_update': 0.012573281526565552, 'critic_loss': 337.6943600769043, 'actor_loss': -0.9622506382465362, 'bc_loss': 2.9702236952781678, 'time_step': 0.03268706464767456, 'td_error': 1060.2872038944256, 'value_scale': 385.23220122869736, 'discounted_advantage': -583.6232330070965, 'initial_state': 270.90789794921875, 'diff_eval': 113474.30846605197} step=30000
2025-12-06 22:07.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.19it/s, critic_loss=370, actor_loss=-0.943, bc_loss=2.97]


2025-12-06 22:08.03 [info     ] ReBRAC_20251206214911: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.019470159769058228, 'time_algorithm_update': 0.012906818389892579, 'critic_loss': 369.7606228179932, 'actor_loss': -0.943003858089447, 'bc_loss': 2.97266077709198, 'time_step': 0.032632226705551146, 'td_error': 1070.8080036016474, 'value_scale': 403.3833827097078, 'discounted_advantage': -611.9053630962312, 'initial_state': 273.6826477050781, 'diff_eval': 113474.30846605197} step=31000
2025-12-06 22:08.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.79it/s, critic_loss=411, actor_loss=-0.921, bc_loss=2.97]


2025-12-06 22:08.39 [info     ] ReBRAC_20251206214911: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.019006147623062134, 'time_algorithm_update': 0.012726971626281738, 'critic_loss': 411.84542308044433, 'actor_loss': -0.9207200826406479, 'bc_loss': 2.973616186618805, 'time_step': 0.03198848223686218, 'td_error': 1251.964026192478, 'value_scale': 416.45871543214787, 'discounted_advantage': -632.4377522622641, 'initial_state': 294.23162841796875, 'diff_eval': 113474.30846605197} step=32000
2025-12-06 22:08.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.04it/s, critic_loss=473, actor_loss=-0.872, bc_loss=2.97]


2025-12-06 22:09.16 [info     ] ReBRAC_20251206214911: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.019550466299057007, 'time_algorithm_update': 0.012935898065567017, 'critic_loss': 473.487674621582, 'actor_loss': -0.8714549171924592, 'bc_loss': 2.9740772581100465, 'time_step': 0.032752180099487306, 'td_error': 1204.610820914954, 'value_scale': 418.10162207027093, 'discounted_advantage': -622.067005916687, 'initial_state': 308.1558532714844, 'diff_eval': 113474.30846605197} step=33000
2025-12-06 22:09.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.04it/s, critic_loss=574, actor_loss=-0.814, bc_loss=2.97]


2025-12-06 22:09.53 [info     ] ReBRAC_20251206214911: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.01967001795768738, 'time_algorithm_update': 0.012871401548385621, 'critic_loss': 574.6036848144531, 'actor_loss': -0.813918186545372, 'bc_loss': 2.973594466209412, 'time_step': 0.03279911398887634, 'td_error': 1436.4272839583136, 'value_scale': 417.0898178203308, 'discounted_advantage': -636.3451778891294, 'initial_state': 305.49664306640625, 'diff_eval': 113474.30846605197} step=34000
2025-12-06 22:09.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.52it/s, critic_loss=667, actor_loss=-0.757, bc_loss=2.97]


2025-12-06 22:10.29 [info     ] ReBRAC_20251206214911: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.019238723278045655, 'time_algorithm_update': 0.012770435333251954, 'critic_loss': 667.7410408325195, 'actor_loss': -0.7569893069267273, 'bc_loss': 2.973854335308075, 'time_step': 0.03225682425498962, 'td_error': 1475.342106721906, 'value_scale': 412.312662021812, 'discounted_advantage': -626.0793294093986, 'initial_state': 310.29925537109375, 'diff_eval': 113474.30846605197} step=35000
2025-12-06 22:10.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.80it/s, critic_loss=725, actor_loss=-0.7, bc_loss=2.97] 


2025-12-06 22:11.06 [info     ] ReBRAC_20251206214911: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.019798169851303102, 'time_algorithm_update': 0.012995285511016846, 'critic_loss': 725.0491523742676, 'actor_loss': -0.6993421745300293, 'bc_loss': 2.9745041069984435, 'time_step': 0.03303935623168945, 'td_error': 1661.9463499406952, 'value_scale': 408.4075137112986, 'discounted_advantage': -629.2944250107865, 'initial_state': 309.95672607421875, 'diff_eval': 113474.30846605197} step=36000
2025-12-06 22:11.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.20it/s, critic_loss=741, actor_loss=-0.643, bc_loss=2.97]


2025-12-06 22:11.42 [info     ] ReBRAC_20251206214911: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.019512666702270507, 'time_algorithm_update': 0.012834321737289428, 'critic_loss': 740.1119409484863, 'actor_loss': -0.6427018251419068, 'bc_loss': 2.972758987903595, 'time_step': 0.03260638165473938, 'td_error': 1339.5892112059, 'value_scale': 397.4025370022477, 'discounted_advantage': -585.0176931072466, 'initial_state': 283.2903137207031, 'diff_eval': 113474.30846605197} step=37000
2025-12-06 22:11.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.74it/s, critic_loss=752, actor_loss=-0.593, bc_loss=2.97]


2025-12-06 22:12.18 [info     ] ReBRAC_20251206214911: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.019134689331054687, 'time_algorithm_update': 0.012655761957168579, 'critic_loss': 752.1965153808594, 'actor_loss': -0.5932198822498321, 'bc_loss': 2.9721384596824647, 'time_step': 0.032041437864303586, 'td_error': 1286.5085961432949, 'value_scale': 393.2221923837218, 'discounted_advantage': -555.2542754829985, 'initial_state': 275.35015869140625, 'diff_eval': 113474.30846605197} step=38000
2025-12-06 22:12.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.47it/s, critic_loss=787, actor_loss=-0.565, bc_loss=2.97]


2025-12-06 22:12.54 [info     ] ReBRAC_20251206214911: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.01936531162261963, 'time_algorithm_update': 0.012699659585952759, 'critic_loss': 788.1467100524902, 'actor_loss': -0.5653792259097099, 'bc_loss': 2.971709156036377, 'time_step': 0.03232347822189331, 'td_error': 2139.0349767756297, 'value_scale': 414.15559816120657, 'discounted_advantage': -691.6070974993748, 'initial_state': 269.5617980957031, 'diff_eval': 113474.30846605197} step=39000
2025-12-06 22:12.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.55it/s, critic_loss=802, actor_loss=-0.573, bc_loss=2.97]


2025-12-06 22:13.31 [info     ] ReBRAC_20251206214911: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.019258482694625854, 'time_algorithm_update': 0.012749211311340332, 'critic_loss': 803.1613887939453, 'actor_loss': -0.5730363000631332, 'bc_loss': 2.9746100678443907, 'time_step': 0.03225622606277466, 'td_error': 1726.1191407123324, 'value_scale': 427.708091776409, 'discounted_advantage': -633.9006380651933, 'initial_state': 268.6797180175781, 'diff_eval': 113474.30846605197} step=40000
2025-12-06 22:13.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.59it/s, critic_loss=822, actor_loss=-0.589, bc_loss=2.97]


2025-12-06 22:14.07 [info     ] ReBRAC_20251206214911: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.01915264821052551, 'time_algorithm_update': 0.012796965599060059, 'critic_loss': 822.4069614868164, 'actor_loss': -0.5892409220933914, 'bc_loss': 2.972486704826355, 'time_step': 0.03220433449745178, 'td_error': 1899.305322614347, 'value_scale': 451.31755569759576, 'discounted_advantage': -694.5708958822338, 'initial_state': 259.467041015625, 'diff_eval': 113474.30846605197} step=41000
2025-12-06 22:14.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.80it/s, critic_loss=862, actor_loss=-0.609, bc_loss=2.97]


2025-12-06 22:14.44 [info     ] ReBRAC_20251206214911: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.01968353533744812, 'time_algorithm_update': 0.013058080911636353, 'critic_loss': 862.7599940185547, 'actor_loss': -0.6094452457427979, 'bc_loss': 2.9715111546516417, 'time_step': 0.0330212197303772, 'td_error': 2511.2082013920426, 'value_scale': 479.24249614993766, 'discounted_advantage': -779.7984841308511, 'initial_state': 271.4909362792969, 'diff_eval': 113474.30846605197} step=42000
2025-12-06 22:14.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.68it/s, critic_loss=927, actor_loss=-0.627, bc_loss=2.97]


2025-12-06 22:15.21 [info     ] ReBRAC_20251206214911: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.019308393716812134, 'time_algorithm_update': 0.01362356948852539, 'critic_loss': 926.7262192382813, 'actor_loss': -0.6267726719379425, 'bc_loss': 2.97418084192276, 'time_step': 0.03319314551353455, 'td_error': 1876.5112181846755, 'value_scale': 497.7740975793729, 'discounted_advantage': -735.0975289905082, 'initial_state': 271.6328430175781, 'diff_eval': 113474.30846605197} step=43000
2025-12-06 22:15.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.16it/s, critic_loss=1.02e+3, actor_loss=-0.662, bc_loss=2.97]


2025-12-06 22:15.56 [info     ] ReBRAC_20251206214911: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.018816344499588012, 'time_algorithm_update': 0.012535522699356079, 'critic_loss': 1019.9902311401368, 'actor_loss': -0.6618288595676423, 'bc_loss': 2.9711941866874696, 'time_step': 0.03160185647010803, 'td_error': 2570.837735355904, 'value_scale': 532.1149226777811, 'discounted_advantage': -823.5711462827597, 'initial_state': 297.2191162109375, 'diff_eval': 113474.30846605197} step=44000
2025-12-06 22:15.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.02it/s, critic_loss=1.1e+3, actor_loss=-0.702, bc_loss=2.97]


2025-12-06 22:16.33 [info     ] ReBRAC_20251206214911: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.019748008012771608, 'time_algorithm_update': 0.012763884544372559, 'critic_loss': 1095.975567138672, 'actor_loss': -0.7026493858098983, 'bc_loss': 2.972279099941254, 'time_step': 0.03278027129173279, 'td_error': 3041.8535160382385, 'value_scale': 560.7102019762852, 'discounted_advantage': -900.670899116692, 'initial_state': 305.27288818359375, 'diff_eval': 113474.30846605197} step=45000
2025-12-06 22:16.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.57it/s, critic_loss=1.2e+3, actor_loss=-0.748, bc_loss=2.97]


2025-12-06 22:17.10 [info     ] ReBRAC_20251206214911: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.019035716772079467, 'time_algorithm_update': 0.012891488790512084, 'critic_loss': 1199.301212524414, 'actor_loss': -0.7478088510036468, 'bc_loss': 2.9716706252098084, 'time_step': 0.0321825921535492, 'td_error': 3138.5308177754496, 'value_scale': 590.7479916042574, 'discounted_advantage': -947.5049465224795, 'initial_state': 299.3382873535156, 'diff_eval': 113474.30846605197} step=46000
2025-12-06 22:17.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.09it/s, critic_loss=1.31e+3, actor_loss=-0.792, bc_loss=2.97]


2025-12-06 22:17.45 [info     ] ReBRAC_20251206214911: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.018516515016555785, 'time_algorithm_update': 0.012809802293777466, 'critic_loss': 1309.7823298339845, 'actor_loss': -0.7924273575544357, 'bc_loss': 2.9721895699501037, 'time_step': 0.03160178112983704, 'td_error': 4572.004717788793, 'value_scale': 649.2740276241542, 'discounted_advantage': -1074.3482954437413, 'initial_state': 318.4123229980469, 'diff_eval': 113474.30846605197} step=47000
2025-12-06 22:17.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.48it/s, critic_loss=1.45e+3, actor_loss=-0.84, bc_loss=2.97]


2025-12-06 22:18.21 [info     ] ReBRAC_20251206214911: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.019342437982559205, 'time_algorithm_update': 0.012669427633285522, 'critic_loss': 1455.995989501953, 'actor_loss': -0.8401246900558472, 'bc_loss': 2.973363076686859, 'time_step': 0.03228109002113342, 'td_error': 6188.503999668884, 'value_scale': 718.2534275704437, 'discounted_advantage': -1212.5868283720367, 'initial_state': 369.55426025390625, 'diff_eval': 113474.30846605197} step=48000
2025-12-06 22:18.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.83it/s, critic_loss=1.68e+3, actor_loss=-0.875, bc_loss=2.97]


2025-12-06 22:18.57 [info     ] ReBRAC_20251206214911: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.018994301795959474, 'time_algorithm_update': 0.012687304258346558, 'critic_loss': 1680.5831134033203, 'actor_loss': -0.8752080872058868, 'bc_loss': 2.971222939968109, 'time_step': 0.03194479441642761, 'td_error': 4268.9496738503185, 'value_scale': 746.9703691727158, 'discounted_advantage': -1134.12278913028, 'initial_state': 348.8543701171875, 'diff_eval': 113474.30846605197} step=49000
2025-12-06 22:18.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.77it/s, critic_loss=1.93e+3, actor_loss=-0.895, bc_loss=2.97]


2025-12-06 22:19.32 [info     ] ReBRAC_20251206214911: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.018343793630599976, 'time_algorithm_update': 0.012387149333953858, 'critic_loss': 1929.1639735107422, 'actor_loss': -0.8954066416025162, 'bc_loss': 2.972967617034912, 'time_step': 0.03098103904724121, 'td_error': 6452.385027746904, 'value_scale': 820.4610015849356, 'discounted_advantage': -1284.4203161172695, 'initial_state': 432.0592956542969, 'diff_eval': 113474.30846605197} step=50000
2025-12-06 22:19.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.67it/s, critic_loss=2.28e+3, actor_loss=-0.909, bc_loss=2.97]


2025-12-06 22:20.08 [info     ] ReBRAC_20251206214911: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.019182182550430298, 'time_algorithm_update': 0.012650673151016235, 'critic_loss': 2284.318003173828, 'actor_loss': -0.9091071548461914, 'bc_loss': 2.9717173538208006, 'time_step': 0.03209385442733765, 'td_error': 8575.176733867178, 'value_scale': 892.9169580280931, 'discounted_advantage': -1386.9923924780967, 'initial_state': 461.2447509765625, 'diff_eval': 113474.30846605197} step=51000
2025-12-06 22:20.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.38it/s, critic_loss=2.79e+3, actor_loss=-0.913, bc_loss=2.97]


2025-12-06 22:20.43 [info     ] ReBRAC_20251206214911: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.018706064701080324, 'time_algorithm_update': 0.01238363528251648, 'critic_loss': 2793.342530151367, 'actor_loss': -0.9134021859169006, 'bc_loss': 2.9713547825813293, 'time_step': 0.03134273910522461, 'td_error': 11026.957624720128, 'value_scale': 953.7368034113383, 'discounted_advantage': -1610.0523201725496, 'initial_state': 480.3109130859375, 'diff_eval': 113474.30846605197} step=52000
2025-12-06 22:20.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.93it/s, critic_loss=3.36e+3, actor_loss=-0.916, bc_loss=2.97]


2025-12-06 22:21.19 [info     ] ReBRAC_20251206214911: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.019067973375320433, 'time_algorithm_update': 0.012492211103439331, 'critic_loss': 3361.158330566406, 'actor_loss': -0.9160440561771392, 'bc_loss': 2.973779064655304, 'time_step': 0.031809966802597046, 'td_error': 12056.00638902321, 'value_scale': 1009.5127065111785, 'discounted_advantage': -1672.8457366425014, 'initial_state': 473.6809387207031, 'diff_eval': 113474.30846605197} step=53000
2025-12-06 22:21.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.94it/s, critic_loss=4e+3, actor_loss=-0.919, bc_loss=2.97]  


2025-12-06 22:21.55 [info     ] ReBRAC_20251206214911: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.01898923945426941, 'time_algorithm_update': 0.012561821937561035, 'critic_loss': 4002.787290283203, 'actor_loss': -0.9189782621860504, 'bc_loss': 2.971710196018219, 'time_step': 0.03182328057289124, 'td_error': 12173.77816512302, 'value_scale': 1085.5855858791604, 'discounted_advantage': -1730.3463489064807, 'initial_state': 483.271728515625, 'diff_eval': 113474.30846605197} step=54000
2025-12-06 22:21.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.04it/s, critic_loss=4.68e+3, actor_loss=-0.92, bc_loss=2.97]


2025-12-06 22:22.30 [info     ] ReBRAC_20251206214911: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.018896021127700807, 'time_algorithm_update': 0.012546358823776246, 'critic_loss': 4680.184884033203, 'actor_loss': -0.9200812276601792, 'bc_loss': 2.971701714515686, 'time_step': 0.031706056356430055, 'td_error': 15482.955268338463, 'value_scale': 1164.6614890398344, 'discounted_advantage': -1893.2576463925968, 'initial_state': 552.9384155273438, 'diff_eval': 113474.30846605197} step=55000
2025-12-06 22:22.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.24it/s, critic_loss=5.36e+3, actor_loss=-0.918, bc_loss=2.97]


2025-12-06 22:23.07 [info     ] ReBRAC_20251206214911: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.019509642839431764, 'time_algorithm_update': 0.012795138597488404, 'critic_loss': 5360.796139648438, 'actor_loss': -0.918122727394104, 'bc_loss': 2.97211332654953, 'time_step': 0.032555967569351196, 'td_error': 16206.47684365626, 'value_scale': 1210.1627717038111, 'discounted_advantage': -1951.9265464158113, 'initial_state': 514.576416015625, 'diff_eval': 113474.30846605197} step=56000
2025-12-06 22:23.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.88it/s, critic_loss=6e+3, actor_loss=-0.915, bc_loss=2.97]  


2025-12-06 22:23.42 [info     ] ReBRAC_20251206214911: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.018805381298065187, 'time_algorithm_update': 0.012826850652694703, 'critic_loss': 5997.9190009765625, 'actor_loss': -0.9145454467535019, 'bc_loss': 2.9726881637573244, 'time_step': 0.031887560606002806, 'td_error': 16067.278738729216, 'value_scale': 1256.644900302675, 'discounted_advantage': -2010.1657504064804, 'initial_state': 524.974365234375, 'diff_eval': 113474.30846605197} step=57000
2025-12-06 22:23.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:34<00:00, 29.35it/s, critic_loss=6.84e+3, actor_loss=-0.905, bc_loss=2.97]


2025-12-06 22:24.20 [info     ] ReBRAC_20251206214911: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.019965959072113036, 'time_algorithm_update': 0.013305461645126342, 'critic_loss': 6846.23338671875, 'actor_loss': -0.9052802715301513, 'bc_loss': 2.9686567006111146, 'time_step': 0.03353940153121948, 'td_error': 24371.339723297424, 'value_scale': 1346.4248308467945, 'discounted_advantage': -2230.822985892632, 'initial_state': 596.9743041992188, 'diff_eval': 113474.30846605197} step=58000
2025-12-06 22:24.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.26it/s, critic_loss=7.91e+3, actor_loss=-0.897, bc_loss=2.97]


2025-12-06 22:24.56 [info     ] ReBRAC_20251206214911: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.0197719464302063, 'time_algorithm_update': 0.012499242305755616, 'critic_loss': 7920.968535644532, 'actor_loss': -0.8964962565898895, 'bc_loss': 2.970953050136566, 'time_step': 0.03253535890579223, 'td_error': 21056.22913555524, 'value_scale': 1372.7126091116131, 'discounted_advantage': -2175.7896129219607, 'initial_state': 554.1658935546875, 'diff_eval': 113474.30846605197} step=59000
2025-12-06 22:24.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.61it/s, critic_loss=8.94e+3, actor_loss=-0.886, bc_loss=2.97]


2025-12-06 22:25.34 [info     ] ReBRAC_20251206214911: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.0205125515460968, 'time_algorithm_update': 0.012499333381652832, 'critic_loss': 8948.635047363281, 'actor_loss': -0.8861783518791199, 'bc_loss': 2.9695701856613157, 'time_step': 0.03326504492759705, 'td_error': 21554.08090622237, 'value_scale': 1398.9606584819144, 'discounted_advantage': -2189.8113498605526, 'initial_state': 572.1329956054688, 'diff_eval': 113474.30846605197} step=60000
2025-12-06 22:25.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.11it/s, critic_loss=1.02e+4, actor_loss=-0.877, bc_loss=2.97]


2025-12-06 22:26.12 [info     ] ReBRAC_20251206214911: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.01944803547859192, 'time_algorithm_update': 0.012941598415374756, 'critic_loss': 10207.701055175781, 'actor_loss': -0.8768353176116943, 'bc_loss': 2.9704127583503723, 'time_step': 0.032663796663284304, 'td_error': 23293.060681464285, 'value_scale': 1409.8341321651415, 'discounted_advantage': -2210.648667933427, 'initial_state': 553.5929565429688, 'diff_eval': 113474.30846605197} step=61000
2025-12-06 22:26.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.45it/s, critic_loss=1.18e+4, actor_loss=-0.867, bc_loss=2.97]


2025-12-06 22:26.49 [info     ] ReBRAC_20251206214911: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.019985692262649537, 'time_algorithm_update': 0.013088865995407104, 'critic_loss': 11787.754270507812, 'actor_loss': -0.8669231930971145, 'bc_loss': 2.9723169684410093, 'time_step': 0.03334659218788147, 'td_error': 27096.63771121707, 'value_scale': 1472.2479131979194, 'discounted_advantage': -2356.79492249824, 'initial_state': 607.387451171875, 'diff_eval': 113474.30846605197} step=62000
2025-12-06 22:26.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.80it/s, critic_loss=1.34e+4, actor_loss=-0.862, bc_loss=2.97]


2025-12-06 22:27.24 [info     ] ReBRAC_20251206214911: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.018345011711120605, 'time_algorithm_update': 0.012368569374084473, 'critic_loss': 13416.323989257813, 'actor_loss': -0.8615928140878677, 'bc_loss': 2.972763913154602, 'time_step': 0.030968857288360595, 'td_error': 24468.5834141235, 'value_scale': 1463.1379042838864, 'discounted_advantage': -2221.415557365541, 'initial_state': 499.9111633300781, 'diff_eval': 113474.30846605197} step=63000
2025-12-06 22:27.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.45it/s, critic_loss=1.49e+4, actor_loss=-0.857, bc_loss=2.97]


2025-12-06 22:28.01 [info     ] ReBRAC_20251206214911: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.019461843729019164, 'time_algorithm_update': 0.012601361751556397, 'critic_loss': 14932.694806640626, 'actor_loss': -0.856470852971077, 'bc_loss': 2.9727817730903627, 'time_step': 0.0323218629360199, 'td_error': 30017.26090796899, 'value_scale': 1516.8304114347732, 'discounted_advantage': -2414.905246029436, 'initial_state': 482.8928527832031, 'diff_eval': 113474.30846605197} step=64000
2025-12-06 22:28.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.49it/s, critic_loss=1.63e+4, actor_loss=-0.857, bc_loss=2.97]


2025-12-06 22:28.37 [info     ] ReBRAC_20251206214911: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.019307196855545043, 'time_algorithm_update': 0.012734459638595581, 'critic_loss': 16300.279626953125, 'actor_loss': -0.8568784608840943, 'bc_loss': 2.9732905626296997, 'time_step': 0.032292249202728274, 'td_error': 29564.48871118769, 'value_scale': 1548.922841643248, 'discounted_advantage': -2405.121966275552, 'initial_state': 518.870361328125, 'diff_eval': 113474.30846605197} step=65000
2025-12-06 22:28.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.59it/s, critic_loss=1.73e+4, actor_loss=-0.855, bc_loss=2.97]


2025-12-06 22:29.13 [info     ] ReBRAC_20251206214911: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.019348803520202636, 'time_algorithm_update': 0.01260475492477417, 'critic_loss': 17325.38607519531, 'actor_loss': -0.8547840157747268, 'bc_loss': 2.9721049070358276, 'time_step': 0.0322053804397583, 'td_error': 37862.03539162433, 'value_scale': 1592.9249851409868, 'discounted_advantage': -2635.0150353226754, 'initial_state': 531.850341796875, 'diff_eval': 113474.30846605197} step=66000
2025-12-06 22:29.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.13it/s, critic_loss=1.81e+4, actor_loss=-0.859, bc_loss=2.97]


2025-12-06 22:29.48 [info     ] ReBRAC_20251206214911: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.01882789421081543, 'time_algorithm_update': 0.012523452043533325, 'critic_loss': 18059.93086230469, 'actor_loss': -0.859169954419136, 'bc_loss': 2.9711050157547, 'time_step': 0.03161128544807434, 'td_error': 31413.09000016049, 'value_scale': 1596.0373216526707, 'discounted_advantage': -2528.0858694275785, 'initial_state': 493.56329345703125, 'diff_eval': 113474.30846605197} step=67000
2025-12-06 22:29.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.29it/s, critic_loss=1.88e+4, actor_loss=-0.854, bc_loss=2.97]


2025-12-06 22:30.25 [info     ] ReBRAC_20251206214911: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.019227223873138428, 'time_algorithm_update': 0.012965694427490234, 'critic_loss': 18784.616947265626, 'actor_loss': -0.853459857583046, 'bc_loss': 2.9719030447006225, 'time_step': 0.03246738171577453, 'td_error': 42588.26586714896, 'value_scale': 1691.4422005228105, 'discounted_advantage': -2824.757690296824, 'initial_state': 477.29461669921875, 'diff_eval': 113474.30846605197} step=68000
2025-12-06 22:30.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.96it/s, critic_loss=1.98e+4, actor_loss=-0.851, bc_loss=2.97]


2025-12-06 22:31.00 [info     ] ReBRAC_20251206214911: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.018933497667312622, 'time_algorithm_update': 0.012591447114944459, 'critic_loss': 19824.713087890625, 'actor_loss': -0.851076149225235, 'bc_loss': 2.9731166954040527, 'time_step': 0.03178659892082215, 'td_error': 39318.690601842114, 'value_scale': 1700.3954444169399, 'discounted_advantage': -2645.067259561632, 'initial_state': 465.0078125, 'diff_eval': 113474.30846605197} step=69000
2025-12-06 22:31.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.92it/s, critic_loss=2.07e+4, actor_loss=-0.844, bc_loss=2.97]


2025-12-06 22:31.37 [info     ] ReBRAC_20251206214911: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.019711536169052125, 'time_algorithm_update': 0.012947458744049073, 'critic_loss': 20696.985283203125, 'actor_loss': -0.844592131972313, 'bc_loss': 2.9722890267372133, 'time_step': 0.032905839204788206, 'td_error': 55009.82556871662, 'value_scale': 1775.4358025564432, 'discounted_advantage': -3112.858035344307, 'initial_state': 490.7578125, 'diff_eval': 113474.30846605197} step=70000
2025-12-06 22:31.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.65it/s, critic_loss=2.17e+4, actor_loss=-0.836, bc_loss=2.97]


2025-12-06 22:32.13 [info     ] ReBRAC_20251206214911: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.01912833762168884, 'time_algorithm_update': 0.012747648477554321, 'critic_loss': 21670.846650390624, 'actor_loss': -0.8359347223043442, 'bc_loss': 2.9728059487342833, 'time_step': 0.0321349241733551, 'td_error': 45470.871904359796, 'value_scale': 1802.2371374685426, 'discounted_advantage': -2921.0100036797553, 'initial_state': 448.9894104003906, 'diff_eval': 113474.30846605197} step=71000
2025-12-06 22:32.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.20it/s, critic_loss=2.35e+4, actor_loss=-0.822, bc_loss=2.97]


2025-12-06 22:32.50 [info     ] ReBRAC_20251206214911: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.019423874855041504, 'time_algorithm_update': 0.012926707983016967, 'critic_loss': 23465.16229296875, 'actor_loss': -0.8222810996770858, 'bc_loss': 2.971089286804199, 'time_step': 0.03261272120475769, 'td_error': 45231.561273761574, 'value_scale': 1814.4498841984373, 'discounted_advantage': -2975.813562685314, 'initial_state': 372.0096740722656, 'diff_eval': 113474.30846605197} step=72000
2025-12-06 22:32.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.74it/s, critic_loss=2.51e+4, actor_loss=-0.808, bc_loss=2.97]


2025-12-06 22:33.28 [info     ] ReBRAC_20251206214911: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.019451460599899294, 'time_algorithm_update': 0.013403972387313843, 'critic_loss': 25063.0997421875, 'actor_loss': -0.8081697567701339, 'bc_loss': 2.97091553401947, 'time_step': 0.03310771298408508, 'td_error': 42385.340043580196, 'value_scale': 1821.3502958892577, 'discounted_advantage': -2917.43439376928, 'initial_state': 396.5733947753906, 'diff_eval': 113474.30846605197} step=73000
2025-12-06 22:33.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.31it/s, critic_loss=2.7e+4, actor_loss=-0.792, bc_loss=2.97]


2025-12-06 22:34.04 [info     ] ReBRAC_20251206214911: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.019423864126205444, 'time_algorithm_update': 0.012754860401153565, 'critic_loss': 26971.03869140625, 'actor_loss': -0.7914032917022705, 'bc_loss': 2.9724797382354735, 'time_step': 0.032441243410110475, 'td_error': 51674.97219264753, 'value_scale': 1879.1425069663471, 'discounted_advantage': -3076.027066393817, 'initial_state': 379.4574890136719, 'diff_eval': 113474.30846605197} step=74000
2025-12-06 22:34.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.69it/s, critic_loss=2.81e+4, actor_loss=-0.772, bc_loss=2.97]


2025-12-06 22:34.40 [info     ] ReBRAC_20251206214911: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.019102466106414796, 'time_algorithm_update': 0.012758677959442138, 'critic_loss': 28071.14684765625, 'actor_loss': -0.771519869685173, 'bc_loss': 2.971897644996643, 'time_step': 0.032108003854751585, 'td_error': 58418.76779404881, 'value_scale': 1884.2725327076876, 'discounted_advantage': -3146.885553906284, 'initial_state': 337.50811767578125, 'diff_eval': 113474.30846605197} step=75000
2025-12-06 22:34.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:34<00:00, 29.17it/s, critic_loss=2.96e+4, actor_loss=-0.743, bc_loss=2.97]


2025-12-06 22:35.18 [info     ] ReBRAC_20251206214911: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.020615289449691772, 'time_algorithm_update': 0.012855249643325805, 'critic_loss': 29555.5641953125, 'actor_loss': -0.742798507809639, 'bc_loss': 2.9743669595718383, 'time_step': 0.03374444079399109, 'td_error': 48629.0080602577, 'value_scale': 1864.2583287307823, 'discounted_advantage': -3032.8768436995692, 'initial_state': 361.0612487792969, 'diff_eval': 113474.30846605197} step=76000
2025-12-06 22:35.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.73it/s, critic_loss=3.09e+4, actor_loss=-0.706, bc_loss=2.97]


2025-12-06 22:35.54 [info     ] ReBRAC_20251206214911: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.019067874670028687, 'time_algorithm_update': 0.012670311689376832, 'critic_loss': 30917.24451171875, 'actor_loss': -0.7058490920066833, 'bc_loss': 2.9736052746772765, 'time_step': 0.03201708936691284, 'td_error': 61708.90106491227, 'value_scale': 1880.8431605557284, 'discounted_advantage': -3244.860615899393, 'initial_state': 382.6476135253906, 'diff_eval': 113474.30846605197} step=77000
2025-12-06 22:35.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.40it/s, critic_loss=3.31e+4, actor_loss=-0.66, bc_loss=2.97]


2025-12-06 22:36.29 [info     ] ReBRAC_20251206214911: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.018772529363632204, 'time_algorithm_update': 0.012335307836532593, 'critic_loss': 33105.6850625, 'actor_loss': -0.6600581188201904, 'bc_loss': 2.972963712692261, 'time_step': 0.031358452081680296, 'td_error': 43311.88184790307, 'value_scale': 1826.5754698491955, 'discounted_advantage': -2766.802234448381, 'initial_state': 320.2466735839844, 'diff_eval': 113474.30846605197} step=78000
2025-12-06 22:36.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.28it/s, critic_loss=3.59e+4, actor_loss=-0.596, bc_loss=2.97]


2025-12-06 22:37.05 [info     ] ReBRAC_20251206214911: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.01941965889930725, 'time_algorithm_update': 0.012813056945800781, 'critic_loss': 35887.01927734375, 'actor_loss': -0.5953682121038437, 'bc_loss': 2.972390345096588, 'time_step': 0.03250388765335083, 'td_error': 69082.15492957878, 'value_scale': 1838.725884880424, 'discounted_advantage': -3256.247504473806, 'initial_state': 234.93572998046875, 'diff_eval': 113474.30846605197} step=79000
2025-12-06 22:37.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.90it/s, critic_loss=3.79e+4, actor_loss=-0.533, bc_loss=2.97]


2025-12-06 22:37.42 [info     ] ReBRAC_20251206214911: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.019691282033920288, 'time_algorithm_update': 0.012956523656845093, 'critic_loss': 37901.63111914063, 'actor_loss': -0.5333397296071053, 'bc_loss': 2.970190176486969, 'time_step': 0.032915268898010254, 'td_error': 52668.72097610232, 'value_scale': 1763.638791886679, 'discounted_advantage': -2818.023352136148, 'initial_state': 151.20965576171875, 'diff_eval': 113474.30846605197} step=80000
2025-12-06 22:37.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.65it/s, critic_loss=4.03e+4, actor_loss=-0.455, bc_loss=2.97]


2025-12-06 22:38.17 [info     ] ReBRAC_20251206214911: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.018336306571960448, 'time_algorithm_update': 0.012515475749969482, 'critic_loss': 40332.72646289063, 'actor_loss': -0.45453543883562086, 'bc_loss': 2.971939539909363, 'time_step': 0.03110986304283142, 'td_error': 55818.68144675704, 'value_scale': 1729.6452923424586, 'discounted_advantage': -2783.841007981571, 'initial_state': 96.27838897705078, 'diff_eval': 113474.30846605197} step=81000
2025-12-06 22:38.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.10it/s, critic_loss=4.24e+4, actor_loss=-0.358, bc_loss=2.97]


2025-12-06 22:38.54 [info     ] ReBRAC_20251206214911: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.019473399877548217, 'time_algorithm_update': 0.012939427852630615, 'critic_loss': 42381.874306640624, 'actor_loss': -0.35815384912490844, 'bc_loss': 2.9728996262550353, 'time_step': 0.03269153594970703, 'td_error': 53418.695437290946, 'value_scale': 1620.6354605262256, 'discounted_advantage': -2593.581316342946, 'initial_state': -173.7652587890625, 'diff_eval': 113474.30846605197} step=82000
2025-12-06 22:38.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.65it/s, critic_loss=4.54e+4, actor_loss=-0.287, bc_loss=2.97]


2025-12-06 22:39.30 [info     ] ReBRAC_20251206214911: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.019130619525909425, 'time_algorithm_update': 0.012751754522323609, 'critic_loss': 45433.77318164062, 'actor_loss': -0.28665785044431685, 'bc_loss': 2.9712088265419005, 'time_step': 0.03212745809555054, 'td_error': 65344.94808148485, 'value_scale': 1584.7484295544061, 'discounted_advantage': -2840.4303965778827, 'initial_state': -297.209716796875, 'diff_eval': 113474.30846605197} step=83000
2025-12-06 22:39.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.48it/s, critic_loss=4.85e+4, actor_loss=-0.178, bc_loss=2.97]


2025-12-06 22:40.06 [info     ] ReBRAC_20251206214911: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.01930567693710327, 'time_algorithm_update': 0.012705920696258545, 'critic_loss': 48474.25931445313, 'actor_loss': -0.17736630718037485, 'bc_loss': 2.9723107523918153, 'time_step': 0.03228014516830444, 'td_error': 59762.59886879307, 'value_scale': 1533.1482977551366, 'discounted_advantage': -2608.0717438580427, 'initial_state': -313.8309631347656, 'diff_eval': 113474.30846605197} step=84000
2025-12-06 22:40.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.95it/s, critic_loss=5.02e+4, actor_loss=-0.0428, bc_loss=2.97]


2025-12-06 22:40.43 [info     ] ReBRAC_20251206214911: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.019499761104583742, 'time_algorithm_update': 0.013016031503677368, 'critic_loss': 50162.34893554688, 'actor_loss': -0.0421582119744271, 'bc_loss': 2.9730994348526, 'time_step': 0.03280149221420288, 'td_error': 65852.69217997539, 'value_scale': 1519.3092519799302, 'discounted_advantage': -2666.5798581401646, 'initial_state': -361.8510437011719, 'diff_eval': 113474.30846605197} step=85000
2025-12-06 22:40.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.13it/s, critic_loss=5.28e+4, actor_loss=0.0976, bc_loss=2.97]


2025-12-06 22:41.20 [info     ] ReBRAC_20251206214911: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.019485727548599244, 'time_algorithm_update': 0.012864940643310547, 'critic_loss': 52814.45642578125, 'actor_loss': 0.09835714399255813, 'bc_loss': 2.971770857810974, 'time_step': 0.0326308901309967, 'td_error': 75142.22139295419, 'value_scale': 1522.2649250076483, 'discounted_advantage': -3014.583567484054, 'initial_state': -613.9539184570312, 'diff_eval': 113474.30846605197} step=86000
2025-12-06 22:41.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.81it/s, critic_loss=5.5e+4, actor_loss=0.166, bc_loss=2.97]


2025-12-06 22:41.56 [info     ] ReBRAC_20251206214911: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.019529179334640503, 'time_algorithm_update': 0.012131913185119629, 'critic_loss': 55009.3391015625, 'actor_loss': 0.16609603771567344, 'bc_loss': 2.9712464385032655, 'time_step': 0.031928840398788454, 'td_error': 114889.95447618944, 'value_scale': 1723.775095062104, 'discounted_advantage': -3734.365026045143, 'initial_state': -738.6398315429688, 'diff_eval': 113474.30846605197} step=87000
2025-12-06 22:41.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.44it/s, critic_loss=5.7e+4, actor_loss=0.196, bc_loss=2.97]


2025-12-06 22:42.33 [info     ] ReBRAC_20251206214911: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.019185355663299562, 'time_algorithm_update': 0.01397959017753601, 'critic_loss': 57017.475546875, 'actor_loss': 0.19542160046845675, 'bc_loss': 2.9720577931404115, 'time_step': 0.033427530765533446, 'td_error': 124908.72153602274, 'value_scale': 1852.7573555620997, 'discounted_advantage': -3933.9974827135475, 'initial_state': -876.7366943359375, 'diff_eval': 113474.30846605197} step=88000
2025-12-06 22:42.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.21it/s, critic_loss=6.05e+4, actor_loss=0.181, bc_loss=2.97]


2025-12-06 22:43.10 [info     ] ReBRAC_20251206214911: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.019407982587814333, 'time_algorithm_update': 0.012899988174438477, 'critic_loss': 60556.31746484375, 'actor_loss': 0.1808889628574252, 'bc_loss': 2.9723620252609253, 'time_step': 0.03258204197883606, 'td_error': 153641.43566384763, 'value_scale': 2149.733982472288, 'discounted_advantage': -4568.219166572133, 'initial_state': -727.3824462890625, 'diff_eval': 113474.30846605197} step=89000
2025-12-06 22:43.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.22it/s, critic_loss=6.56e+4, actor_loss=0.163, bc_loss=2.97]


2025-12-06 22:43.46 [info     ] ReBRAC_20251206214911: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.018886106252670287, 'time_algorithm_update': 0.012390641927719116, 'critic_loss': 65643.52896875, 'actor_loss': 0.16205577977746724, 'bc_loss': 2.971926255226135, 'time_step': 0.03153101134300232, 'td_error': 240127.62547868246, 'value_scale': 2481.824280326944, 'discounted_advantage': -5503.018176220758, 'initial_state': -566.898193359375, 'diff_eval': 113474.30846605197} step=90000
2025-12-06 22:43.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.70it/s, critic_loss=6.99e+4, actor_loss=0.112, bc_loss=2.97]


2025-12-06 22:44.22 [info     ] ReBRAC_20251206214911: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.01919127011299133, 'time_algorithm_update': 0.013735246658325196, 'critic_loss': 69985.66512109376, 'actor_loss': 0.11159103503171354, 'bc_loss': 2.9713393988609313, 'time_step': 0.033185604810714725, 'td_error': 195061.36132986686, 'value_scale': 2701.8746810163257, 'discounted_advantage': -5315.290205092587, 'initial_state': -579.137939453125, 'diff_eval': 113474.30846605197} step=91000
2025-12-06 22:44.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.96it/s, critic_loss=7.5e+4, actor_loss=0.0535, bc_loss=2.97]


2025-12-06 22:44.59 [info     ] ReBRAC_20251206214911: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.019606807231903077, 'time_algorithm_update': 0.01298073434829712, 'critic_loss': 75102.174953125, 'actor_loss': 0.053210429459344594, 'bc_loss': 2.9713317527770995, 'time_step': 0.032850153684616086, 'td_error': 218798.6734109665, 'value_scale': 3052.11170541151, 'discounted_advantage': -5847.067398194241, 'initial_state': -391.9360046386719, 'diff_eval': 113474.30846605197} step=92000
2025-12-06 22:44.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.10it/s, critic_loss=8.2e+4, actor_loss=-0.0104, bc_loss=2.97]  


2025-12-06 22:45.35 [info     ] ReBRAC_20251206214911: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.01884164881706238, 'time_algorithm_update': 0.012566468954086304, 'critic_loss': 81986.982796875, 'actor_loss': -0.010127250961959362, 'bc_loss': 2.9721640577316286, 'time_step': 0.03167713832855225, 'td_error': 262124.8659210532, 'value_scale': 3422.6459497238343, 'discounted_advantage': -6431.459119017198, 'initial_state': -337.51800537109375, 'diff_eval': 113474.30846605197} step=93000
2025-12-06 22:45.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.93it/s, critic_loss=8.91e+4, actor_loss=-0.0721, bc_loss=2.97]


2025-12-06 22:46.11 [info     ] ReBRAC_20251206214911: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.01884069871902466, 'time_algorithm_update': 0.012736116886138916, 'critic_loss': 89194.5905703125, 'actor_loss': -0.07273529728408903, 'bc_loss': 2.971969196796417, 'time_step': 0.031834223031997684, 'td_error': 272687.83161025646, 'value_scale': 3832.305372201387, 'discounted_advantage': -7056.745521112363, 'initial_state': -22.486112594604492, 'diff_eval': 113474.30846605197} step=94000
2025-12-06 22:46.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.09it/s, critic_loss=9.62e+4, actor_loss=-0.146, bc_loss=2.97]


2025-12-06 22:46.47 [info     ] ReBRAC_20251206214911: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.019527261257171632, 'time_algorithm_update': 0.012920920848846435, 'critic_loss': 96205.25577734374, 'actor_loss': -0.1460156867094338, 'bc_loss': 2.9728191857337953, 'time_step': 0.03271292161941528, 'td_error': 287700.25436734536, 'value_scale': 4285.5398157140535, 'discounted_advantage': -7428.114175343594, 'initial_state': 161.77296447753906, 'diff_eval': 113474.30846605197} step=95000
2025-12-06 22:46.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.78it/s, critic_loss=1.06e+5, actor_loss=-0.197, bc_loss=2.97]


2025-12-06 22:47.23 [info     ] ReBRAC_20251206214911: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.01920796275138855, 'time_algorithm_update': 0.01251356863975525, 'critic_loss': 106296.7250078125, 'actor_loss': -0.19766980012506247, 'bc_loss': 2.972048875808716, 'time_step': 0.03197475600242615, 'td_error': 349881.50970957096, 'value_scale': 4783.425040185101, 'discounted_advantage': -8053.263585465861, 'initial_state': 549.622802734375, 'diff_eval': 113474.30846605197} step=96000
2025-12-06 22:47.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.53it/s, critic_loss=1.16e+5, actor_loss=-0.259, bc_loss=2.97]


2025-12-06 22:47.59 [info     ] ReBRAC_20251206214911: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.019201937675476073, 'time_algorithm_update': 0.012746818780899049, 'critic_loss': 116315.1476875, 'actor_loss': -0.25908820345997813, 'bc_loss': 2.9724499940872193, 'time_step': 0.032216006755828856, 'td_error': 350771.8250579175, 'value_scale': 5239.545921768547, 'discounted_advantage': -8558.471203500625, 'initial_state': 653.9242553710938, 'diff_eval': 113474.30846605197} step=97000
2025-12-06 22:47.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.50it/s, critic_loss=1.29e+5, actor_loss=-0.327, bc_loss=2.97]


2025-12-06 22:48.35 [info     ] ReBRAC_20251206214911: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.019256028413772584, 'time_algorithm_update': 0.012773784637451172, 'critic_loss': 129501.7687890625, 'actor_loss': -0.3271283961236477, 'bc_loss': 2.9739240469932557, 'time_step': 0.032293517112731936, 'td_error': 502428.8198158603, 'value_scale': 5871.2986193142915, 'discounted_advantage': -10131.643641024315, 'initial_state': 1019.0875244140625, 'diff_eval': 113474.30846605197} step=98000
2025-12-06 22:48.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.64it/s, critic_loss=1.47e+5, actor_loss=-0.511, bc_loss=2.21]


2025-12-06 22:49.11 [info     ] ReBRAC_20251206214911: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.019133683204650878, 'time_algorithm_update': 0.012689502239227294, 'critic_loss': 147423.33021875, 'actor_loss': -0.5128316022753715, 'bc_loss': 2.2013203142881395, 'time_step': 0.03209858012199402, 'td_error': 955799.1407698106, 'value_scale': 6676.857041559995, 'discounted_advantage': -11300.872951300416, 'initial_state': 2206.90673828125, 'diff_eval': 36518.98975593965} step=99000
2025-12-06 22:49.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.14it/s, critic_loss=1.01e+6, actor_loss=-0.888, bc_loss=0.622]


2025-12-06 22:49.47 [info     ] ReBRAC_20251206214911: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.018810638189315797, 'time_algorithm_update': 0.01255226159095764, 'critic_loss': 1025381.83634375, 'actor_loss': -0.888076143026352, 'bc_loss': 0.617867950975895, 'time_step': 0.03160939145088196, 'td_error': 459102.1781808152, 'value_scale': 6807.388899299544, 'discounted_advantage': -9339.417123650268, 'initial_state': 3250.579345703125, 'diff_eval': 8199.441602481409} step=100000
2025-12-06 22:49.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.75it/s, critic_loss=2.99e+6, actor_loss=-0.955, bc_loss=0.161]


2025-12-06 22:50.23 [info     ] ReBRAC_20251206214911: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.019096961498260498, 'time_algorithm_update': 0.012641620874404907, 'critic_loss': 2972777.802625, 'actor_loss': -0.9552567783594131, 'bc_loss': 0.16119707277417183, 'time_step': 0.03199927687644959, 'td_error': 336318.4122104124, 'value_scale': 5689.608060122935, 'discounted_advantage': -7703.123501816629, 'initial_state': 2767.79541015625, 'diff_eval': 10696.54093121518} step=101000
2025-12-06 22:50.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.76it/s, critic_loss=1.34e+6, actor_loss=-0.972, bc_loss=0.126]


2025-12-06 22:51.00 [info     ] ReBRAC_20251206214911: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.019769124031066895, 'time_algorithm_update': 0.01298955750465393, 'critic_loss': 1339424.1025625, 'actor_loss': -0.9721853498220444, 'bc_loss': 0.12644695487618446, 'time_step': 0.033040040731430055, 'td_error': 376776.9298406347, 'value_scale': 4887.249815284956, 'discounted_advantage': -6715.33317918553, 'initial_state': 2232.260498046875, 'diff_eval': 4911.406095961293} step=102000
2025-12-06 22:51.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.73it/s, critic_loss=1.06e+6, actor_loss=-0.971, bc_loss=0.127]


2025-12-06 22:51.36 [info     ] ReBRAC_20251206214911: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.01912816333770752, 'time_algorithm_update': 0.012596768140792847, 'critic_loss': 1059819.23415625, 'actor_loss': -0.9713113414049148, 'bc_loss': 0.12726700636744498, 'time_step': 0.0319954023361206, 'td_error': 297970.4418566855, 'value_scale': 4533.095489420415, 'discounted_advantage': -6346.904982890418, 'initial_state': 2057.165771484375, 'diff_eval': 4577.892981548009} step=103000
2025-12-06 22:51.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.42it/s, critic_loss=5.8e+5, actor_loss=-0.96, bc_loss=0.206] 


2025-12-06 22:52.11 [info     ] ReBRAC_20251206214911: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.01865637469291687, 'time_algorithm_update': 0.012397548913955688, 'critic_loss': 579024.35721875, 'actor_loss': -0.9594652915000915, 'bc_loss': 0.20611415825784207, 'time_step': 0.0313126437664032, 'td_error': 296400.3646362807, 'value_scale': 4481.173309358947, 'discounted_advantage': -6877.356308372031, 'initial_state': 2271.596923828125, 'diff_eval': 18557.80424381765} step=104000
2025-12-06 22:52.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.56it/s, critic_loss=3.85e+5, actor_loss=-0.95, bc_loss=0.248]


2025-12-06 22:52.47 [info     ] ReBRAC_20251206214911: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.01933842635154724, 'time_algorithm_update': 0.012585828304290772, 'critic_loss': 384044.718625, 'actor_loss': -0.9499495526552201, 'bc_loss': 0.24760360488295555, 'time_step': 0.032177556753158566, 'td_error': 300546.64595244464, 'value_scale': 4692.141464778384, 'discounted_advantage': -7272.330965623281, 'initial_state': 2348.861328125, 'diff_eval': 19514.005803454216} step=105000
2025-12-06 22:52.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.48it/s, critic_loss=2.81e+5, actor_loss=-0.952, bc_loss=0.259]


2025-12-06 22:53.24 [info     ] ReBRAC_20251206214911: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.01940264081954956, 'time_algorithm_update': 0.012645637512207031, 'critic_loss': 280762.636796875, 'actor_loss': -0.9520669990777969, 'bc_loss': 0.25906069433689116, 'time_step': 0.032313551187515256, 'td_error': 372292.6132909731, 'value_scale': 4867.441607691117, 'discounted_advantage': -8012.89850352468, 'initial_state': 2319.906005859375, 'diff_eval': 17845.907987518687} step=106000
2025-12-06 22:53.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.32it/s, critic_loss=3.91e+5, actor_loss=-0.943, bc_loss=0.354]


2025-12-06 22:54.00 [info     ] ReBRAC_20251206214911: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.019372947454452515, 'time_algorithm_update': 0.012837215900421143, 'critic_loss': 388329.5087265625, 'actor_loss': -0.942968538403511, 'bc_loss': 0.35631052041053773, 'time_step': 0.03247077226638794, 'td_error': 561375.9449769156, 'value_scale': 5363.269138126741, 'discounted_advantage': -9523.013137332833, 'initial_state': 2769.887451171875, 'diff_eval': 64646.464284956994} step=107000
2025-12-06 22:54.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.13it/s, critic_loss=1.06e+5, actor_loss=-0.964, bc_loss=0.687]


2025-12-06 22:54.37 [info     ] ReBRAC_20251206214911: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.01955034589767456, 'time_algorithm_update': 0.012885288953781129, 'critic_loss': 105790.283609375, 'actor_loss': -0.9644807834625244, 'bc_loss': 0.6874145754575729, 'time_step': 0.03268389821052551, 'td_error': 485477.01936466136, 'value_scale': 6228.115563988186, 'discounted_advantage': -10293.235341873542, 'initial_state': 3477.482421875, 'diff_eval': 68124.76543163824} step=108000
2025-12-06 22:54.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.66it/s, critic_loss=9.95e+4, actor_loss=-0.971, bc_loss=0.749]


2025-12-06 22:55.13 [info     ] ReBRAC_20251206214911: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.019110958337783812, 'time_algorithm_update': 0.012752053499221802, 'critic_loss': 99584.364375, 'actor_loss': -0.9715131101608276, 'bc_loss': 0.7500294493436813, 'time_step': 0.032115763425827025, 'td_error': 336751.65725921653, 'value_scale': 6780.458974153978, 'discounted_advantage': -9977.490315354158, 'initial_state': 4046.900634765625, 'diff_eval': 71257.66520971082} step=109000
2025-12-06 22:55.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.83it/s, critic_loss=1.14e+5, actor_loss=-0.974, bc_loss=0.963]


2025-12-06 22:55.49 [info     ] ReBRAC_20251206214911: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.019168018102645873, 'time_algorithm_update': 0.01250024700164795, 'critic_loss': 114543.50787890625, 'actor_loss': -0.9741161890029907, 'bc_loss': 0.963860806107521, 'time_step': 0.031931562662124637, 'td_error': 339465.83845293487, 'value_scale': 7310.50511705086, 'discounted_advantage': -10386.633882208747, 'initial_state': 4549.46728515625, 'diff_eval': 63728.17362732392} step=110000
2025-12-06 22:55.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.15it/s, critic_loss=1.48e+5, actor_loss=-0.977, bc_loss=1.2]


2025-12-06 22:56.24 [info     ] ReBRAC_20251206214911: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.018817692041397095, 'time_algorithm_update': 0.012565295934677125, 'critic_loss': 147858.1563359375, 'actor_loss': -0.976889463543892, 'bc_loss': 1.2044320244789124, 'time_step': 0.031632129192352296, 'td_error': 310854.2697542218, 'value_scale': 7753.507655450704, 'discounted_advantage': -10436.795347115789, 'initial_state': 5048.8955078125, 'diff_eval': 66587.24220792447} step=111000
2025-12-06 22:56.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.56it/s, critic_loss=1.98e+5, actor_loss=-0.982, bc_loss=1.56]


2025-12-06 22:57.00 [info     ] ReBRAC_20251206214911: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.019204420328140258, 'time_algorithm_update': 0.012712904691696166, 'critic_loss': 198286.59084375, 'actor_loss': -0.9822637823820114, 'bc_loss': 1.562528141975403, 'time_step': 0.032194226741790774, 'td_error': 327632.4555853057, 'value_scale': 8287.72106586438, 'discounted_advantage': -10955.94545764793, 'initial_state': 5747.1005859375, 'diff_eval': 66295.48036414501} step=112000
2025-12-06 22:57.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.97it/s, critic_loss=2.72e+5, actor_loss=-0.985, bc_loss=1.48]


2025-12-06 22:57.36 [info     ] ReBRAC_20251206214911: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.018905721187591554, 'time_algorithm_update': 0.01260813045501709, 'critic_loss': 272454.3449140625, 'actor_loss': -0.9847877798080444, 'bc_loss': 1.4754467356204986, 'time_step': 0.03177583980560303, 'td_error': 310462.7142962199, 'value_scale': 8408.063104623121, 'discounted_advantage': -10986.801644688203, 'initial_state': 5807.94189453125, 'diff_eval': 49020.06887035623} step=113000
2025-12-06 22:57.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.93it/s, critic_loss=9.16e+5, actor_loss=-0.98, bc_loss=1.19]


2025-12-06 22:58.12 [info     ] ReBRAC_20251206214911: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.01888906478881836, 'time_algorithm_update': 0.012657426834106445, 'critic_loss': 915354.44725, 'actor_loss': -0.9800718518495559, 'bc_loss': 1.1948226680755616, 'time_step': 0.031812618494033815, 'td_error': 412156.0391538004, 'value_scale': 8046.765931188611, 'discounted_advantage': -11101.18276703583, 'initial_state': 5026.0029296875, 'diff_eval': 41268.31262393468} step=114000
2025-12-06 22:58.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.83it/s, critic_loss=1.05e+6, actor_loss=-0.981, bc_loss=1.32]


2025-12-06 22:58.48 [info     ] ReBRAC_20251206214911: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.019000378370285034, 'time_algorithm_update': 0.012632007360458374, 'critic_loss': 1047112.17134375, 'actor_loss': -0.9812353030443192, 'bc_loss': 1.3185983698368073, 'time_step': 0.03189349532127381, 'td_error': 458979.2753184689, 'value_scale': 7813.929444938173, 'discounted_advantage': -10954.135090594116, 'initial_state': 4620.22119140625, 'diff_eval': 37855.26971840823} step=115000
2025-12-06 22:58.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.55it/s, critic_loss=9.08e+5, actor_loss=-0.982, bc_loss=1.54]


2025-12-06 22:59.24 [info     ] ReBRAC_20251206214911: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.019352517127990722, 'time_algorithm_update': 0.012627741575241089, 'critic_loss': 905516.06140625, 'actor_loss': -0.9817751512527466, 'bc_loss': 1.5368136274814606, 'time_step': 0.032232969522476194, 'td_error': 602636.2030669288, 'value_scale': 8138.280972207582, 'discounted_advantage': -12760.01187373229, 'initial_state': 4431.82666015625, 'diff_eval': 49500.60864521346} step=116000
2025-12-06 22:59.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.72it/s, critic_loss=6.27e+5, actor_loss=-0.985, bc_loss=1.81]


2025-12-06 23:00.00 [info     ] ReBRAC_20251206214911: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.019174286127090455, 'time_algorithm_update': 0.012621504545211793, 'critic_loss': 625896.154390625, 'actor_loss': -0.9853102148771286, 'bc_loss': 1.8154235196113586, 'time_step': 0.03205944967269898, 'td_error': 804870.4738477783, 'value_scale': 8782.082600158898, 'discounted_advantage': -14273.940924754856, 'initial_state': 4451.85546875, 'diff_eval': 61246.498504680916} step=117000
2025-12-06 23:00.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.01it/s, critic_loss=6.7e+5, actor_loss=-0.988, bc_loss=1.51]


2025-12-06 23:00.36 [info     ] ReBRAC_20251206214911: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.018952495336532593, 'time_algorithm_update': 0.012539411544799805, 'critic_loss': 672499.291796875, 'actor_loss': -0.9884816004037857, 'bc_loss': 1.5079536817073822, 'time_step': 0.03175465202331543, 'td_error': 935144.1957692988, 'value_scale': 9521.93499313356, 'discounted_advantage': -15110.574475714076, 'initial_state': 4560.830078125, 'diff_eval': 40019.69253503777} step=118000
2025-12-06 23:00.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.31it/s, critic_loss=1.31e+6, actor_loss=-0.989, bc_loss=0.838]


2025-12-06 23:01.11 [info     ] ReBRAC_20251206214911: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.01869410729408264, 'time_algorithm_update': 0.012439980506896972, 'critic_loss': 1310925.4330625, 'actor_loss': -0.9889259896278382, 'bc_loss': 0.8403246055841446, 'time_step': 0.03139552140235901, 'td_error': 764648.6731114879, 'value_scale': 9488.35326584147, 'discounted_advantage': -14323.368720714752, 'initial_state': 4804.82373046875, 'diff_eval': 27585.062690133687} step=119000
2025-12-06 23:01.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.84it/s, critic_loss=1.56e+6, actor_loss=-0.989, bc_loss=1.18]


2025-12-06 23:01.47 [info     ] ReBRAC_20251206214911: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.019024229764938356, 'time_algorithm_update': 0.01264038109779358, 'critic_loss': 1558885.3311875, 'actor_loss': -0.9889420430660247, 'bc_loss': 1.1751090687513353, 'time_step': 0.03192184829711914, 'td_error': 858036.946146368, 'value_scale': 9555.865063014195, 'discounted_advantage': -14748.712503909852, 'initial_state': 4791.373046875, 'diff_eval': 25598.393061133618} step=120000
2025-12-06 23:01.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.23it/s, critic_loss=1.63e+6, actor_loss=-0.988, bc_loss=1.37]


2025-12-06 23:02.23 [info     ] ReBRAC_20251206214911: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.018704602241516113, 'time_algorithm_update': 0.01255662178993225, 'critic_loss': 1621141.82528125, 'actor_loss': -0.9877047121524811, 'bc_loss': 1.3704179974794388, 'time_step': 0.03150506782531738, 'td_error': 1431013.3593980896, 'value_scale': 10239.748562563394, 'discounted_advantage': -17410.952974018703, 'initial_state': 4806.37744140625, 'diff_eval': 75787.51611444952} step=121000
2025-12-06 23:02.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.93it/s, critic_loss=4.73e+5, actor_loss=-0.993, bc_loss=1.24]


2025-12-06 23:02.58 [info     ] ReBRAC_20251206214911: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.01877094054222107, 'time_algorithm_update': 0.012796293020248413, 'critic_loss': 473615.36228125, 'actor_loss': -0.9926873797178268, 'bc_loss': 1.2390458920001983, 'time_step': 0.03182146525382996, 'td_error': 1098173.1666783453, 'value_scale': 11504.246811850373, 'discounted_advantage': -17376.7339104603, 'initial_state': 5767.55126953125, 'diff_eval': 41790.730267071594} step=122000
2025-12-06 23:02.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.16it/s, critic_loss=2.44e+6, actor_loss=-0.993, bc_loss=0.537]


2025-12-06 23:03.35 [info     ] ReBRAC_20251206214911: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.01944259238243103, 'time_algorithm_update': 0.01294791054725647, 'critic_loss': 2466472.62125, 'actor_loss': -0.993331148982048, 'bc_loss': 0.5351962706744671, 'time_step': 0.03266002249717712, 'td_error': 1124664.0560340742, 'value_scale': 10420.615192578005, 'discounted_advantage': -15577.235768737344, 'initial_state': 4512.77001953125, 'diff_eval': 12462.494176297969} step=123000
2025-12-06 23:03.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.29it/s, critic_loss=7.06e+6, actor_loss=-0.991, bc_loss=0.239]


2025-12-06 23:04.10 [info     ] ReBRAC_20251206214911: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.018821151494979857, 'time_algorithm_update': 0.012400914669036865, 'critic_loss': 7078511.335, 'actor_loss': -0.9907300654649734, 'bc_loss': 0.23865820848941802, 'time_step': 0.03146866846084595, 'td_error': 1426773.536352625, 'value_scale': 9619.586094457365, 'discounted_advantage': -15609.36026815939, 'initial_state': 5462.90185546875, 'diff_eval': 20014.24905966815} step=124000
2025-12-06 23:04.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.18it/s, critic_loss=3.59e+6, actor_loss=-0.984, bc_loss=0.249]


2025-12-06 23:04.47 [info     ] ReBRAC_20251206214911: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.01937863349914551, 'time_algorithm_update': 0.01297553849220276, 'critic_loss': 3586717.735125, 'actor_loss': -0.9836306960582734, 'bc_loss': 0.24904200866818427, 'time_step': 0.032620042324066165, 'td_error': 1287938.581574761, 'value_scale': 9428.912394298191, 'discounted_advantage': -13610.116589139921, 'initial_state': 5056.970703125, 'diff_eval': 22218.820823502403} step=125000
2025-12-06 23:04.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.36it/s, critic_loss=2.37e+6, actor_loss=-0.985, bc_loss=0.334]


2025-12-06 23:05.23 [info     ] ReBRAC_20251206214911: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.019279577016830446, 'time_algorithm_update': 0.012872814655303955, 'critic_loss': 2368953.4125, 'actor_loss': -0.9846859943866729, 'bc_loss': 0.33392828303575517, 'time_step': 0.032421915054321286, 'td_error': 1332678.4984168087, 'value_scale': 8948.755309596545, 'discounted_advantage': -13725.19398954758, 'initial_state': 5126.18017578125, 'diff_eval': 24307.523635072506} step=126000
2025-12-06 23:05.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.33it/s, critic_loss=1.47e+6, actor_loss=-0.986, bc_loss=0.414]


2025-12-06 23:06.00 [info     ] ReBRAC_20251206214911: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.01929271674156189, 'time_algorithm_update': 0.012897337198257447, 'critic_loss': 1467280.2633125, 'actor_loss': -0.9857126411199569, 'bc_loss': 0.41452899354696277, 'time_step': 0.03244759321212769, 'td_error': 1649530.261691619, 'value_scale': 8553.667801393347, 'discounted_advantage': -13947.233447520617, 'initial_state': 5158.1123046875, 'diff_eval': 22499.340923700493} step=127000
2025-12-06 23:06.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.30it/s, critic_loss=1.21e+6, actor_loss=-0.986, bc_loss=0.389]


2025-12-06 23:06.36 [info     ] ReBRAC_20251206214911: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.019208091735839844, 'time_algorithm_update': 0.013033361673355103, 'critic_loss': 1215648.4125625, 'actor_loss': -0.9861478720903396, 'bc_loss': 0.38808251917362213, 'time_step': 0.03249013733863831, 'td_error': 1817134.9963947178, 'value_scale': 7933.431686732116, 'discounted_advantage': -13495.985664088452, 'initial_state': 4887.20166015625, 'diff_eval': 13342.154260087056} step=128000
2025-12-06 23:06.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.08it/s, critic_loss=1.82e+6, actor_loss=-0.987, bc_loss=0.367]


2025-12-06 23:07.12 [info     ] ReBRAC_20251206214911: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.018586583375930787, 'time_algorithm_update': 0.012768271207809448, 'critic_loss': 1807054.37728125, 'actor_loss': -0.9870498124361038, 'bc_loss': 0.36770512753725054, 'time_step': 0.031628524780273434, 'td_error': 1545345.7169864296, 'value_scale': 7454.142850857868, 'discounted_advantage': -12482.906268218287, 'initial_state': 5034.8955078125, 'diff_eval': 13364.847671171934} step=129000
2025-12-06 23:07.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.48it/s, critic_loss=1.21e+6, actor_loss=-0.989, bc_loss=0.468]


2025-12-06 23:07.49 [info     ] ReBRAC_20251206214911: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.020077763557434083, 'time_algorithm_update': 0.013004076004028321, 'critic_loss': 1204825.38434375, 'actor_loss': -0.9892981621026993, 'bc_loss': 0.46854951268434525, 'time_step': 0.03335817265510559, 'td_error': 1376681.2640469517, 'value_scale': 6892.844015347288, 'discounted_advantage': -11590.793677053396, 'initial_state': 4952.91552734375, 'diff_eval': 13258.537522075421} step=130000
2025-12-06 23:07.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:34<00:00, 29.08it/s, critic_loss=8.56e+5, actor_loss=-0.992, bc_loss=0.496]


2025-12-06 23:08.27 [info     ] ReBRAC_20251206214911: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.020478307723999025, 'time_algorithm_update': 0.013096868515014648, 'critic_loss': 854014.44696875, 'actor_loss': -0.9918504225015641, 'bc_loss': 0.4963271111249924, 'time_step': 0.03384586763381958, 'td_error': 1477501.8951371547, 'value_scale': 6382.459209897792, 'discounted_advantage': -10870.476509714214, 'initial_state': 5060.8037109375, 'diff_eval': 15564.643603743043} step=131000
2025-12-06 23:08.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.27it/s, critic_loss=5.88e+5, actor_loss=-0.994, bc_loss=0.56]


2025-12-06 23:09.04 [info     ] ReBRAC_20251206214911: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.019254494667053222, 'time_algorithm_update': 0.013011233568191529, 'critic_loss': 586176.188203125, 'actor_loss': -0.9936427328586578, 'bc_loss': 0.5630770668387413, 'time_step': 0.0325158748626709, 'td_error': 1327482.7978842282, 'value_scale': 5955.578534724226, 'discounted_advantage': -11712.907519101944, 'initial_state': 4820.1435546875, 'diff_eval': 29586.090069014503} step=132000
2025-12-06 23:09.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.06it/s, critic_loss=2.65e+5, actor_loss=-0.995, bc_loss=1.05]


2025-12-06 23:09.39 [info     ] ReBRAC_20251206214911: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.01885708427429199, 'time_algorithm_update': 0.01255531907081604, 'critic_loss': 265359.978625, 'actor_loss': -0.9951540423631668, 'bc_loss': 1.0504594875574111, 'time_step': 0.031685266733169556, 'td_error': 1213774.0180146012, 'value_scale': 5432.528511180062, 'discounted_advantage': -10225.033062299037, 'initial_state': 4560.0419921875, 'diff_eval': 28158.2911601015} step=133000
2025-12-06 23:09.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:34<00:00, 29.20it/s, critic_loss=2.4e+5, actor_loss=-0.996, bc_loss=1.12]


2025-12-06 23:10.17 [info     ] ReBRAC_20251206214911: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.019175398349761962, 'time_algorithm_update': 0.014272647857666016, 'critic_loss': 239400.591671875, 'actor_loss': -0.9958940072059631, 'bc_loss': 1.11785356259346, 'time_step': 0.03371881461143494, 'td_error': 1165310.3982883443, 'value_scale': 4844.2068322254445, 'discounted_advantage': -9094.664165519178, 'initial_state': 4195.12890625, 'diff_eval': 21430.843709509983} step=134000
2025-12-06 23:10.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.01it/s, critic_loss=2.28e+5, actor_loss=-0.996, bc_loss=1.25]


2025-12-06 23:10.53 [info     ] ReBRAC_20251206214911: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.01889483952522278, 'time_algorithm_update': 0.01259390377998352, 'critic_loss': 227291.69028125, 'actor_loss': -0.9960021505355835, 'bc_loss': 1.2473704407215118, 'time_step': 0.03174828815460205, 'td_error': 1169432.459141091, 'value_scale': 4429.990036151436, 'discounted_advantage': -9130.460128102963, 'initial_state': 3755.34619140625, 'diff_eval': 20204.3955492415} step=135000
2025-12-06 23:10.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.25it/s, critic_loss=1.89e+5, actor_loss=-0.996, bc_loss=1.4]


2025-12-06 23:11.30 [info     ] ReBRAC_20251206214911: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.019297220706939698, 'time_algorithm_update': 0.01294173240661621, 'critic_loss': 188541.779125, 'actor_loss': -0.9961951929330826, 'bc_loss': 1.400692174911499, 'time_step': 0.0325207622051239, 'td_error': 1159410.9984622379, 'value_scale': 4018.1713579086727, 'discounted_advantage': -8912.2202496425, 'initial_state': 3290.564453125, 'diff_eval': 18259.353526674044} step=136000
2025-12-06 23:11.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.38it/s, critic_loss=1.83e+5, actor_loss=-0.996, bc_loss=1.43]


2025-12-06 23:12.06 [info     ] ReBRAC_20251206214911: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.019454331398010255, 'time_algorithm_update': 0.012666953802108765, 'critic_loss': 183105.206703125, 'actor_loss': -0.9962555952072144, 'bc_loss': 1.427945708990097, 'time_step': 0.03237922239303589, 'td_error': 1191914.290590152, 'value_scale': 3727.770796453663, 'discounted_advantage': -9063.960920830254, 'initial_state': 2928.939453125, 'diff_eval': 16742.11147549527} step=137000
2025-12-06 23:12.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.05it/s, critic_loss=1.77e+5, actor_loss=-0.996, bc_loss=1.41]


2025-12-06 23:12.42 [info     ] ReBRAC_20251206214911: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.018790745973587035, 'time_algorithm_update': 0.012576940536499024, 'critic_loss': 177190.2496171875, 'actor_loss': -0.995845660328865, 'bc_loss': 1.4126159965991973, 'time_step': 0.0316422324180603, 'td_error': 1237417.9809990588, 'value_scale': 3421.644780906428, 'discounted_advantage': -8714.455339757606, 'initial_state': 2552.708251953125, 'diff_eval': 15484.857968284792} step=138000
2025-12-06 23:12.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.23it/s, critic_loss=1.79e+5, actor_loss=-0.995, bc_loss=1.44]


2025-12-06 23:13.17 [info     ] ReBRAC_20251206214911: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.01866877841949463, 'time_algorithm_update': 0.012555607080459595, 'critic_loss': 179167.850609375, 'actor_loss': -0.9954557628631592, 'bc_loss': 1.4407056679725647, 'time_step': 0.03150149917602539, 'td_error': 1285153.9409765892, 'value_scale': 3143.105759809223, 'discounted_advantage': -8876.518204352278, 'initial_state': 2231.7333984375, 'diff_eval': 12892.308669188478} step=139000
2025-12-06 23:13.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.98it/s, critic_loss=1.74e+5, actor_loss=-0.995, bc_loss=1.49]


2025-12-06 23:13.53 [info     ] ReBRAC_20251206214911: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.018969497203826905, 'time_algorithm_update': 0.012563498258590699, 'critic_loss': 174168.78190625, 'actor_loss': -0.9948578248023987, 'bc_loss': 1.485473911523819, 'time_step': 0.03179183387756348, 'td_error': 1335914.3926985986, 'value_scale': 2851.2117105449634, 'discounted_advantage': -9271.467773615084, 'initial_state': 1978.2532958984375, 'diff_eval': 11916.117987332398} step=140000
2025-12-06 23:13.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.55it/s, critic_loss=1.91e+5, actor_loss=-0.995, bc_loss=1.46]


2025-12-06 23:14.28 [info     ] ReBRAC_20251206214911: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.018496933698654176, 'time_algorithm_update': 0.012438822269439697, 'critic_loss': 191592.5662421875, 'actor_loss': -0.9951819452047348, 'bc_loss': 1.4589497587680818, 'time_step': 0.031191288709640504, 'td_error': 1262779.2268821178, 'value_scale': 2581.103749415301, 'discounted_advantage': -9164.380890049842, 'initial_state': 1902.1351318359375, 'diff_eval': 13281.522878711} step=141000
2025-12-06 23:14.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.59it/s, critic_loss=1.81e+5, actor_loss=-0.995, bc_loss=1.48]


2025-12-06 23:15.04 [info     ] ReBRAC_20251206214911: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.0191587815284729, 'time_algorithm_update': 0.012767292976379395, 'critic_loss': 180539.7175234375, 'actor_loss': -0.9950515254735947, 'bc_loss': 1.4846076002120971, 'time_step': 0.03218637275695801, 'td_error': 1323827.6545439565, 'value_scale': 2296.080163091546, 'discounted_advantage': -9296.677046552693, 'initial_state': 1624.7203369140625, 'diff_eval': 11919.033370153493} step=142000
2025-12-06 23:15.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.96it/s, critic_loss=1.78e+5, actor_loss=-0.996, bc_loss=1.47]


2025-12-06 23:15.40 [info     ] ReBRAC_20251206214911: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.018947648048400878, 'time_algorithm_update': 0.012594862222671508, 'critic_loss': 177492.630390625, 'actor_loss': -0.995544927239418, 'bc_loss': 1.470640816450119, 'time_step': 0.031797555923461916, 'td_error': 1436503.921193101, 'value_scale': 2087.66974724788, 'discounted_advantage': -9697.764606266564, 'initial_state': 1464.822509765625, 'diff_eval': 10856.684572451572} step=143000
2025-12-06 23:15.40 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.53it/s, critic_loss=1.68e+5, actor_loss=-0.996, bc_loss=1.42]


2025-12-06 23:16.17 [info     ] ReBRAC_20251206214911: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.019894548177719115, 'time_algorithm_update': 0.013155580997467041, 'critic_loss': 167998.0534765625, 'actor_loss': -0.9959326753616333, 'bc_loss': 1.4177472009658814, 'time_step': 0.03331593751907349, 'td_error': 1508922.5144936533, 'value_scale': 1914.5143004049798, 'discounted_advantage': -9926.036015877975, 'initial_state': 1286.0093994140625, 'diff_eval': 10183.839367241504} step=144000
2025-12-06 23:16.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.77it/s, critic_loss=1.64e+5, actor_loss=-0.996, bc_loss=1.4]


2025-12-06 23:16.53 [info     ] ReBRAC_20251206214911: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.01904509735107422, 'time_algorithm_update': 0.012709042549133302, 'critic_loss': 164315.95953125, 'actor_loss': -0.9956349060535431, 'bc_loss': 1.396847366809845, 'time_step': 0.032012948513031006, 'td_error': 1508450.5745949117, 'value_scale': 1768.133684594793, 'discounted_advantage': -9956.890583847284, 'initial_state': 1066.9134521484375, 'diff_eval': 9627.871894457614} step=145000
2025-12-06 23:16.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.45it/s, critic_loss=1.5e+5, actor_loss=-0.995, bc_loss=1.4] 


2025-12-06 23:17.31 [info     ] ReBRAC_20251206214911: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.01986765265464783, 'time_algorithm_update': 0.013278628826141357, 'critic_loss': 149879.0184375, 'actor_loss': -0.9950629994869232, 'bc_loss': 1.3967303342819213, 'time_step': 0.03341147089004517, 'td_error': 1403469.1992619736, 'value_scale': 1633.8727539293125, 'discounted_advantage': -9700.786571450077, 'initial_state': 890.9717407226562, 'diff_eval': 9482.12915562102} step=146000
2025-12-06 23:17.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:34<00:00, 29.40it/s, critic_loss=1.46e+5, actor_loss=-0.995, bc_loss=1.35]


2025-12-06 23:18.08 [info     ] ReBRAC_20251206214911: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.020126484394073486, 'time_algorithm_update': 0.013007822036743165, 'critic_loss': 146242.312671875, 'actor_loss': -0.9945360544919968, 'bc_loss': 1.3499534780979157, 'time_step': 0.033426594495773315, 'td_error': 1387156.137498485, 'value_scale': 1544.2551406385114, 'discounted_advantage': -9617.695118545533, 'initial_state': 676.2762451171875, 'diff_eval': 9115.46542773655} step=147000
2025-12-06 23:18.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.32it/s, critic_loss=1.4e+5, actor_loss=-0.994, bc_loss=1.29]


2025-12-06 23:18.44 [info     ] ReBRAC_20251206214911: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.01938337779045105, 'time_algorithm_update': 0.012799857378005982, 'critic_loss': 140010.8249609375, 'actor_loss': -0.9939056521654129, 'bc_loss': 1.2943235898017884, 'time_step': 0.0324513738155365, 'td_error': 1265648.3546855836, 'value_scale': 1519.6858446927802, 'discounted_advantage': -9198.884056944127, 'initial_state': 612.3262939453125, 'diff_eval': 8958.70319886126} step=148000
2025-12-06 23:18.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.09it/s, critic_loss=1.55e+5, actor_loss=-0.993, bc_loss=1.26]


2025-12-06 23:19.22 [info     ] ReBRAC_20251206214911: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.01950619077682495, 'time_algorithm_update': 0.01293871235847473, 'critic_loss': 155396.187, 'actor_loss': -0.9934108400344849, 'bc_loss': 1.263886248588562, 'time_step': 0.03271183156967163, 'td_error': 1295452.7083524442, 'value_scale': 1452.7600976595436, 'discounted_advantage': -9182.244236498966, 'initial_state': 420.98046875, 'diff_eval': 8875.469272287888} step=149000
2025-12-06 23:19.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.89it/s, critic_loss=1.43e+5, actor_loss=-0.993, bc_loss=1.25]


2025-12-06 23:19.58 [info     ] ReBRAC_20251206214911: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.019006386280059816, 'time_algorithm_update': 0.012589909791946411, 'critic_loss': 143169.2080078125, 'actor_loss': -0.9934054344892502, 'bc_loss': 1.2457205567359924, 'time_step': 0.031852270364761355, 'td_error': 1219927.1735191857, 'value_scale': 1423.1541388596393, 'discounted_advantage': -8967.429810318394, 'initial_state': 385.6158447265625, 'diff_eval': 8606.581132554413} step=150000
2025-12-06 23:19.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.49it/s, critic_loss=1.54e+5, actor_loss=-0.994, bc_loss=1.24]


2025-12-06 23:20.34 [info     ] ReBRAC_20251206214911: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.019405673027038575, 'time_algorithm_update': 0.01259702229499817, 'critic_loss': 153852.8409765625, 'actor_loss': -0.9935301163196564, 'bc_loss': 1.2356938786506653, 'time_step': 0.0322940137386322, 'td_error': 1258952.6836398258, 'value_scale': 1444.6656463957593, 'discounted_advantage': -9030.552065630964, 'initial_state': 406.2082824707031, 'diff_eval': 8642.280840692156} step=151000
2025-12-06 23:20.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.33it/s, critic_loss=1.46e+5, actor_loss=-0.993, bc_loss=1.22]


2025-12-06 23:21.10 [info     ] ReBRAC_20251206214911: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.01929663896560669, 'time_algorithm_update': 0.012890374660491944, 'critic_loss': 146923.370078125, 'actor_loss': -0.99338238966465, 'bc_loss': 1.2232745609283446, 'time_step': 0.03244933271408081, 'td_error': 1249179.9861396584, 'value_scale': 1448.9151316638365, 'discounted_advantage': -9018.609041324495, 'initial_state': 356.9272766113281, 'diff_eval': 8347.924553303386} step=152000
2025-12-06 23:21.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.44it/s, critic_loss=1.79e+5, actor_loss=-0.993, bc_loss=1.16]


2025-12-06 23:21.47 [info     ] ReBRAC_20251206214911: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.019226779699325562, 'time_algorithm_update': 0.012876849412918091, 'critic_loss': 180775.3856171875, 'actor_loss': -0.992978788614273, 'bc_loss': 1.1596209163665772, 'time_step': 0.03234826159477234, 'td_error': 1123709.9353468632, 'value_scale': 1337.0844855215603, 'discounted_advantage': -8322.620220815834, 'initial_state': 259.8983154296875, 'diff_eval': 8229.156562311933} step=153000
2025-12-06 23:21.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.07it/s, critic_loss=2.08e+5, actor_loss=-0.993, bc_loss=1.13]


2025-12-06 23:22.22 [info     ] ReBRAC_20251206214911: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.01881381845474243, 'time_algorithm_update': 0.012620370864868164, 'critic_loss': 207210.3930546875, 'actor_loss': -0.993016352057457, 'bc_loss': 1.1282263462543487, 'time_step': 0.03168429684638977, 'td_error': 1122484.0111610135, 'value_scale': 1347.3284010532416, 'discounted_advantage': -8412.984482988259, 'initial_state': 223.1656494140625, 'diff_eval': 8085.70855986406} step=154000
2025-12-06 23:22.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.85it/s, critic_loss=2e+5, actor_loss=-0.993, bc_loss=1.09]  


2025-12-06 23:22.58 [info     ] ReBRAC_20251206214911: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.018933937549591065, 'time_algorithm_update': 0.012702792644500733, 'critic_loss': 199549.317875, 'actor_loss': -0.9927799825668335, 'bc_loss': 1.0896006941795349, 'time_step': 0.03190557551383972, 'td_error': 1065594.6622715276, 'value_scale': 1306.5153471030612, 'discounted_advantage': -8096.014007185724, 'initial_state': 138.24876403808594, 'diff_eval': 8236.609625667317} step=155000
2025-12-06 23:22.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.35it/s, critic_loss=1.99e+5, actor_loss=-0.992, bc_loss=1.12]


2025-12-06 23:23.34 [info     ] ReBRAC_20251206214911: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.019257970333099367, 'time_algorithm_update': 0.012878200769424438, 'critic_loss': 199572.5738984375, 'actor_loss': -0.9921481598615647, 'bc_loss': 1.1195990468263626, 'time_step': 0.03241591024398804, 'td_error': 942504.7251270969, 'value_scale': 1257.6632649133112, 'discounted_advantage': -7619.224813458278, 'initial_state': 234.8809051513672, 'diff_eval': 8259.530386525097} step=156000
2025-12-06 23:23.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.18it/s, critic_loss=1.92e+5, actor_loss=-0.992, bc_loss=1.18]


2025-12-06 23:24.11 [info     ] ReBRAC_20251206214911: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.01948876166343689, 'time_algorithm_update': 0.012848443269729615, 'critic_loss': 191634.162609375, 'actor_loss': -0.9924331811666489, 'bc_loss': 1.1777856175899506, 'time_step': 0.03261799693107605, 'td_error': 1002795.8452164547, 'value_scale': 1234.850055873644, 'discounted_advantage': -7734.834961748547, 'initial_state': 195.02850341796875, 'diff_eval': 8157.744025354384} step=157000
2025-12-06 23:24.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.36it/s, critic_loss=1.78e+5, actor_loss=-0.992, bc_loss=1.2]


2025-12-06 23:24.47 [info     ] ReBRAC_20251206214911: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.019292351484298705, 'time_algorithm_update': 0.012855962991714478, 'critic_loss': 177510.046203125, 'actor_loss': -0.9923687853813171, 'bc_loss': 1.1943779735565185, 'time_step': 0.03241341257095337, 'td_error': 927536.902451313, 'value_scale': 1219.8274681843936, 'discounted_advantage': -7430.882945971784, 'initial_state': 211.57308959960938, 'diff_eval': 8169.966235974664} step=158000
2025-12-06 23:24.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.82it/s, critic_loss=1.76e+5, actor_loss=-0.993, bc_loss=1.05]


2025-12-06 23:25.23 [info     ] ReBRAC_20251206214911: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.019045628547668458, 'time_algorithm_update': 0.012629982948303222, 'critic_loss': 175458.6868359375, 'actor_loss': -0.9930082212686538, 'bc_loss': 1.0495465080738067, 'time_step': 0.031932417392730714, 'td_error': 850127.4767893109, 'value_scale': 1225.5121772039465, 'discounted_advantage': -7198.614144617881, 'initial_state': 319.63037109375, 'diff_eval': 8161.954961936343} step=159000
2025-12-06 23:25.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.16it/s, critic_loss=2.89e+5, actor_loss=-0.993, bc_loss=0.9] 


2025-12-06 23:26.01 [info     ] ReBRAC_20251206214911: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.019044248819351198, 'time_algorithm_update': 0.01332154083251953, 'critic_loss': 288394.9790859375, 'actor_loss': -0.9930374834537506, 'bc_loss': 0.9001087492704392, 'time_step': 0.032635817289352415, 'td_error': 755183.6303547528, 'value_scale': 1204.055631684038, 'discounted_advantage': -6756.386016724853, 'initial_state': 363.46759033203125, 'diff_eval': 8117.5236287512325} step=160000
2025-12-06 23:26.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:34<00:00, 28.76it/s, critic_loss=2.26e+5, actor_loss=-0.993, bc_loss=0.964]


2025-12-06 23:26.39 [info     ] ReBRAC_20251206214911: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.02046728205680847, 'time_algorithm_update': 0.013437317848205567, 'critic_loss': 225869.0632421875, 'actor_loss': -0.9925052955150604, 'bc_loss': 0.9644002410173416, 'time_step': 0.03418330836296082, 'td_error': 566888.0802514246, 'value_scale': 1098.9176795143894, 'discounted_advantage': -5799.218959371135, 'initial_state': 361.8470764160156, 'diff_eval': 7855.951594820254} step=161000
2025-12-06 23:26.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.26it/s, critic_loss=1.87e+5, actor_loss=-0.992, bc_loss=1.05]


2025-12-06 23:27.16 [info     ] ReBRAC_20251206214911: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.019244686126708984, 'time_algorithm_update': 0.013006738185882568, 'critic_loss': 186723.0357265625, 'actor_loss': -0.9923169826269149, 'bc_loss': 1.0521858500242234, 'time_step': 0.032527832984924315, 'td_error': 535680.5041154036, 'value_scale': 1064.160479203054, 'discounted_advantage': -5589.979989024857, 'initial_state': 342.5069274902344, 'diff_eval': 7748.156622633016} step=162000
2025-12-06 23:27.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.10it/s, critic_loss=1.79e+5, actor_loss=-0.992, bc_loss=1.06]


2025-12-06 23:27.52 [info     ] ReBRAC_20251206214911: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.019660922765731812, 'time_algorithm_update': 0.012786044836044312, 'critic_loss': 178891.0228125, 'actor_loss': -0.9920005460977555, 'bc_loss': 1.05954723072052, 'time_step': 0.03270544672012329, 'td_error': 483490.2729828705, 'value_scale': 1012.8505093527759, 'discounted_advantage': -5270.579967061738, 'initial_state': 275.3096923828125, 'diff_eval': 7640.761397949499} step=163000
2025-12-06 23:27.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.79it/s, critic_loss=1.66e+5, actor_loss=-0.992, bc_loss=1.06]


2025-12-06 23:28.28 [info     ] ReBRAC_20251206214911: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.019126503467559813, 'time_algorithm_update': 0.012594318151473998, 'critic_loss': 166546.1112890625, 'actor_loss': -0.9916247729063034, 'bc_loss': 1.0580321902036667, 'time_step': 0.03198564672470093, 'td_error': 526827.585457323, 'value_scale': 1078.8228341202125, 'discounted_advantage': -5394.585952604784, 'initial_state': 329.5086364746094, 'diff_eval': 7741.654055981161} step=164000
2025-12-06 23:28.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.99it/s, critic_loss=1.62e+5, actor_loss=-0.991, bc_loss=1.07]


2025-12-06 23:29.05 [info     ] ReBRAC_20251206214911: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.019489550113677978, 'time_algorithm_update': 0.013029527187347413, 'critic_loss': 161832.41734375, 'actor_loss': -0.9914884178638458, 'bc_loss': 1.0671861324310303, 'time_step': 0.03279005360603333, 'td_error': 474668.82264774054, 'value_scale': 1060.855374316258, 'discounted_advantage': -5109.199967380853, 'initial_state': 295.2667236328125, 'diff_eval': 7732.990778560869} step=165000
2025-12-06 23:29.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.31it/s, critic_loss=1.65e+5, actor_loss=-0.992, bc_loss=1.06]


2025-12-06 23:29.41 [info     ] ReBRAC_20251206214911: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.019383781671524047, 'time_algorithm_update': 0.012840512037277221, 'critic_loss': 164695.4067265625, 'actor_loss': -0.9918513752222061, 'bc_loss': 1.060293066740036, 'time_step': 0.03248944664001465, 'td_error': 467694.1248375458, 'value_scale': 1050.8968906238713, 'discounted_advantage': -4979.803106642125, 'initial_state': 267.66571044921875, 'diff_eval': 7723.255735527907} step=166000
2025-12-06 23:29.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.22it/s, critic_loss=1.66e+5, actor_loss=-0.992, bc_loss=1.05]


2025-12-06 23:30.17 [info     ] ReBRAC_20251206214911: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.01877755618095398, 'time_algorithm_update': 0.012492284774780274, 'critic_loss': 165519.7291953125, 'actor_loss': -0.9921015918254852, 'bc_loss': 1.049084165930748, 'time_step': 0.0315260853767395, 'td_error': 528597.5655224179, 'value_scale': 1120.6731403423576, 'discounted_advantage': -5306.450398544841, 'initial_state': 324.5718688964844, 'diff_eval': 7867.550933427081} step=167000
2025-12-06 23:30.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.89it/s, critic_loss=1.69e+5, actor_loss=-0.993, bc_loss=1.05]


2025-12-06 23:30.53 [info     ] ReBRAC_20251206214911: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.019044909477233887, 'time_algorithm_update': 0.01256723141670227, 'critic_loss': 168726.25778125, 'actor_loss': -0.9926281877756119, 'bc_loss': 1.0525261965990067, 'time_step': 0.03187062263488769, 'td_error': 466524.7610729816, 'value_scale': 1058.9929972342454, 'discounted_advantage': -4997.888979364424, 'initial_state': 291.016357421875, 'diff_eval': 7811.968340778642} step=168000
2025-12-06 23:30.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.65it/s, critic_loss=1.71e+5, actor_loss=-0.993, bc_loss=1.06]


2025-12-06 23:31.29 [info     ] ReBRAC_20251206214911: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.019224783420562743, 'time_algorithm_update': 0.012641109466552735, 'critic_loss': 170737.82609375, 'actor_loss': -0.9930562957525253, 'bc_loss': 1.0561109153032302, 'time_step': 0.03211847257614136, 'td_error': 477184.1529544659, 'value_scale': 1107.2444817579203, 'discounted_advantage': -5079.662551518389, 'initial_state': 346.5335998535156, 'diff_eval': 7853.741040401529} step=169000
2025-12-06 23:31.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.20it/s, critic_loss=1.76e+5, actor_loss=-0.993, bc_loss=1.1]


2025-12-06 23:32.05 [info     ] ReBRAC_20251206214911: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.01942519736289978, 'time_algorithm_update': 0.012927733898162841, 'critic_loss': 176239.151203125, 'actor_loss': -0.9933495240211487, 'bc_loss': 1.0977025274038315, 'time_step': 0.032601889371871945, 'td_error': 451796.60681136214, 'value_scale': 1154.8709822782228, 'discounted_advantage': -4944.363234916501, 'initial_state': 413.298095703125, 'diff_eval': 7894.997967590949} step=170000
2025-12-06 23:32.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.16it/s, critic_loss=1.92e+5, actor_loss=-0.994, bc_loss=1.15]


2025-12-06 23:32.41 [info     ] ReBRAC_20251206214911: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.018652568101882934, 'time_algorithm_update': 0.012657345533370971, 'critic_loss': 191938.9298671875, 'actor_loss': -0.9938027676343918, 'bc_loss': 1.1458485133647918, 'time_step': 0.03157383751869201, 'td_error': 458072.7440419406, 'value_scale': 1137.992665781759, 'discounted_advantage': -4983.889454904094, 'initial_state': 394.0058288574219, 'diff_eval': 7798.690021108811} step=171000
2025-12-06 23:32.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.52it/s, critic_loss=2.1e+5, actor_loss=-0.994, bc_loss=1.14]


2025-12-06 23:33.17 [info     ] ReBRAC_20251206214911: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.019213446140289307, 'time_algorithm_update': 0.012716086149215699, 'critic_loss': 210264.7320234375, 'actor_loss': -0.9942139927148819, 'bc_loss': 1.1382132263183593, 'time_step': 0.032215253829956055, 'td_error': 471604.2666028193, 'value_scale': 1255.4657065900353, 'discounted_advantage': -5080.228054621516, 'initial_state': 560.8049926757812, 'diff_eval': 7790.226101206415} step=172000
2025-12-06 23:33.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.11it/s, critic_loss=2.34e+5, actor_loss=-0.995, bc_loss=1.13]


2025-12-06 23:33.52 [info     ] ReBRAC_20251206214911: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.01881632423400879, 'time_algorithm_update': 0.012580161571502685, 'critic_loss': 234343.2598671875, 'actor_loss': -0.9948476622104645, 'bc_loss': 1.128000458598137, 'time_step': 0.03163870692253113, 'td_error': 421860.807767281, 'value_scale': 1257.4370570438605, 'discounted_advantage': -4846.528425127523, 'initial_state': 599.9793090820312, 'diff_eval': 8082.365170561873} step=173000
2025-12-06 23:33.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.37it/s, critic_loss=2.68e+5, actor_loss=-0.995, bc_loss=1.11]


2025-12-06 23:34.29 [info     ] ReBRAC_20251206214911: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.019139270305633546, 'time_algorithm_update': 0.013030443906784057, 'critic_loss': 267697.0982109375, 'actor_loss': -0.9951311868429183, 'bc_loss': 1.1128738822937012, 'time_step': 0.03242331147193909, 'td_error': 286778.34817218187, 'value_scale': 1117.6196984084634, 'discounted_advantage': -3957.7455195848675, 'initial_state': 578.6991577148438, 'diff_eval': 7969.700570305678} step=174000
2025-12-06 23:34.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.38it/s, critic_loss=2.85e+5, actor_loss=-0.995, bc_loss=1.13]


2025-12-06 23:35.05 [info     ] ReBRAC_20251206214911: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.01917633318901062, 'time_algorithm_update': 0.012829932928085328, 'critic_loss': 285089.5818203125, 'actor_loss': -0.9952257716655731, 'bc_loss': 1.1254260520935058, 'time_step': 0.03228062748908997, 'td_error': 296497.0161860172, 'value_scale': 1153.1549376602045, 'discounted_advantage': -3997.941724986473, 'initial_state': 621.6903686523438, 'diff_eval': 8217.54002999374} step=175000
2025-12-06 23:35.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.16it/s, critic_loss=3.09e+5, actor_loss=-0.996, bc_loss=1.18]


2025-12-06 23:35.42 [info     ] ReBRAC_20251206214911: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.019711356639862062, 'time_algorithm_update': 0.012661489963531494, 'critic_loss': 310102.295953125, 'actor_loss': -0.9955375766754151, 'bc_loss': 1.1801669335365295, 'time_step': 0.03263097405433655, 'td_error': 304121.64514625614, 'value_scale': 1177.3496703236722, 'discounted_advantage': -4209.391561635006, 'initial_state': 655.95947265625, 'diff_eval': 9553.527803693894} step=176000
2025-12-06 23:35.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.77it/s, critic_loss=3.74e+5, actor_loss=-0.996, bc_loss=1.23]


2025-12-06 23:36.20 [info     ] ReBRAC_20251206214911: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.018893544912338255, 'time_algorithm_update': 0.012821279287338257, 'critic_loss': 374618.018640625, 'actor_loss': -0.9960386281013489, 'bc_loss': 1.2262575216293334, 'time_step': 0.03198528027534485, 'td_error': 328021.6693707569, 'value_scale': 1187.2923793399025, 'discounted_advantage': -4426.869370701288, 'initial_state': 670.47705078125, 'diff_eval': 9590.77451581906} step=177000
2025-12-06 23:36.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.46it/s, critic_loss=3.38e+5, actor_loss=-0.996, bc_loss=1.27]


2025-12-06 23:36.57 [info     ] ReBRAC_20251206214911: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.020056507110595704, 'time_algorithm_update': 0.012806267023086548, 'critic_loss': 338303.09428125, 'actor_loss': -0.9961198242902756, 'bc_loss': 1.2693273067474364, 'time_step': 0.0331323549747467, 'td_error': 299620.59492314304, 'value_scale': 1178.6855147338572, 'discounted_advantage': -4283.372547134126, 'initial_state': 554.050048828125, 'diff_eval': 10482.5450880823} step=178000
2025-12-06 23:36.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.71it/s, critic_loss=2.99e+5, actor_loss=-0.996, bc_loss=1.27]


2025-12-06 23:37.33 [info     ] ReBRAC_20251206214911: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.019130178213119506, 'time_algorithm_update': 0.012646162271499634, 'critic_loss': 299383.752859375, 'actor_loss': -0.9956212766170501, 'bc_loss': 1.2695309319496155, 'time_step': 0.032042600154876706, 'td_error': 258578.20576629575, 'value_scale': 1229.589882953069, 'discounted_advantage': -4008.954423200022, 'initial_state': 648.5773315429688, 'diff_eval': 10982.012199119054} step=179000
2025-12-06 23:37.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.63it/s, critic_loss=3.3e+5, actor_loss=-0.996, bc_loss=1.27]


2025-12-06 23:38.09 [info     ] ReBRAC_20251206214911: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.01906488752365112, 'time_algorithm_update': 0.012820837259292603, 'critic_loss': 329724.3486875, 'actor_loss': -0.9956295330524445, 'bc_loss': 1.2656180019378662, 'time_step': 0.03213299798965454, 'td_error': 328551.89133375994, 'value_scale': 1384.5629434830385, 'discounted_advantage': -4327.83532352664, 'initial_state': 802.7405395507812, 'diff_eval': 11079.890839190846} step=180000
2025-12-06 23:38.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.54it/s, critic_loss=4.34e+5, actor_loss=-0.996, bc_loss=1.23]


2025-12-06 23:38.45 [info     ] ReBRAC_20251206214911: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.01910071611404419, 'time_algorithm_update': 0.012882715940475464, 'critic_loss': 435191.83021875, 'actor_loss': -0.9956469125747681, 'bc_loss': 1.2331556980609895, 'time_step': 0.03223900413513184, 'td_error': 279409.0963907274, 'value_scale': 1360.7019077550435, 'discounted_advantage': -4123.691793419612, 'initial_state': 822.2614135742188, 'diff_eval': 9551.372908523832} step=181000
2025-12-06 23:38.45 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.40it/s, critic_loss=5.6e+5, actor_loss=-0.996, bc_loss=1.22]


2025-12-06 23:39.21 [info     ] ReBRAC_20251206214911: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.019319278717041016, 'time_algorithm_update': 0.012804868936538696, 'critic_loss': 560316.878828125, 'actor_loss': -0.995847436785698, 'bc_loss': 1.2219884688854217, 'time_step': 0.032382413148880004, 'td_error': 302238.1135652624, 'value_scale': 1423.2947093832922, 'discounted_advantage': -4279.906868398275, 'initial_state': 907.4727172851562, 'diff_eval': 8915.449431450988} step=182000
2025-12-06 23:39.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:32<00:00, 31.09it/s, critic_loss=6.03e+5, actor_loss=-0.996, bc_loss=1.15]


2025-12-06 23:39.57 [info     ] ReBRAC_20251206214911: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.018886404752731324, 'time_algorithm_update': 0.012525013208389283, 'critic_loss': 605146.792875, 'actor_loss': -0.996398375749588, 'bc_loss': 1.1533484179973603, 'time_step': 0.031667855739593506, 'td_error': 272733.9066883303, 'value_scale': 1366.9310805808568, 'discounted_advantage': -4157.36866498034, 'initial_state': 954.7117309570312, 'diff_eval': 8403.506939524845} step=183000
2025-12-06 23:39.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:33<00:00, 29.84it/s, critic_loss=5.93e+5, actor_loss=-0.997, bc_loss=1.07]


2025-12-06 23:40.34 [info     ] ReBRAC_20251206214911: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.01972567582130432, 'time_algorithm_update': 0.013008729457855224, 'critic_loss': 592601.3776875, 'actor_loss': -0.9968578588962554, 'bc_loss': 1.0679336155653, 'time_step': 0.03299869155883789, 'td_error': 258796.16640537907, 'value_scale': 1280.4309788644564, 'discounted_advantage': -4028.176462087902, 'initial_state': 948.6388549804688, 'diff_eval': 8551.960437952263} step=184000
2025-12-06 23:40.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.49it/s, critic_loss=6.22e+5, actor_loss=-0.997, bc_loss=1.05]


2025-12-06 23:41.10 [info     ] ReBRAC_20251206214911: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.01923178577423096, 'time_algorithm_update': 0.012794460773468018, 'critic_loss': 622232.27478125, 'actor_loss': -0.996805190205574, 'bc_loss': 1.0491367639303208, 'time_step': 0.032291248798370364, 'td_error': 258467.98967840287, 'value_scale': 1238.7678832177444, 'discounted_advantage': -3990.889339950988, 'initial_state': 955.7406005859375, 'diff_eval': 8454.826102419504} step=185000
2025-12-06 23:41.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.98it/s, critic_loss=6.68e+5, actor_loss=-0.997, bc_loss=1.08]


2025-12-06 23:41.46 [info     ] ReBRAC_20251206214911: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.018881436347961425, 'time_algorithm_update': 0.012622874975204468, 'critic_loss': 668410.40275, 'actor_loss': -0.9966606743335724, 'bc_loss': 1.078549329161644, 'time_step': 0.031759515762329105, 'td_error': 278743.5756270813, 'value_scale': 1360.2735381230325, 'discounted_advantage': -4115.154158464317, 'initial_state': 1175.303955078125, 'diff_eval': 8591.396303741825} step=186000
2025-12-06 23:41.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.80it/s, critic_loss=6.66e+5, actor_loss=-0.996, bc_loss=1.09]


2025-12-06 23:42.22 [info     ] ReBRAC_20251206214911: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.018882545709609986, 'time_algorithm_update': 0.012853294610977173, 'critic_loss': 665078.1676875, 'actor_loss': -0.9964989361763, 'bc_loss': 1.0894611854553222, 'time_step': 0.03199389886856079, 'td_error': 255572.67399257023, 'value_scale': 1273.7694389486353, 'discounted_advantage': -3870.90035364143, 'initial_state': 1078.657958984375, 'diff_eval': 8783.535983115647} step=187000
2025-12-06 23:42.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.27it/s, critic_loss=6.25e+5, actor_loss=-0.996, bc_loss=1.11]


2025-12-06 23:42.58 [info     ] ReBRAC_20251206214911: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.01937255358695984, 'time_algorithm_update': 0.012900532960891723, 'critic_loss': 624063.50996875, 'actor_loss': -0.9963761596679688, 'bc_loss': 1.1091570444107055, 'time_step': 0.03252320384979248, 'td_error': 239527.02767101204, 'value_scale': 1215.51150402729, 'discounted_advantage': -3648.3206080397513, 'initial_state': 1029.257080078125, 'diff_eval': 8475.818936023392} step=188000
2025-12-06 23:42.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:34<00:00, 29.23it/s, critic_loss=5.59e+5, actor_loss=-0.996, bc_loss=1.1]


2025-12-06 23:43.36 [info     ] ReBRAC_20251206214911: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.020757359981536865, 'time_algorithm_update': 0.01265990138053894, 'critic_loss': 560404.43584375, 'actor_loss': -0.9961424485445023, 'bc_loss': 1.09604254257679, 'time_step': 0.03367340612411499, 'td_error': 261860.8717206948, 'value_scale': 1177.7865241414243, 'discounted_advantage': -3769.4282451753097, 'initial_state': 998.5048217773438, 'diff_eval': 8765.089881841215} step=189000
2025-12-06 23:43.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.96it/s, critic_loss=6.05e+5, actor_loss=-0.996, bc_loss=1.07]


2025-12-06 23:44.11 [info     ] ReBRAC_20251206214911: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.018966508865356446, 'time_algorithm_update': 0.012571951866149903, 'critic_loss': 604912.80478125, 'actor_loss': -0.9959711775779724, 'bc_loss': 1.0650062314271926, 'time_step': 0.0317893795967102, 'td_error': 259136.65538497502, 'value_scale': 1161.6219039291068, 'discounted_advantage': -3652.1290871824995, 'initial_state': 984.58251953125, 'diff_eval': 8573.497963450347} step=190000
2025-12-06 23:44.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:31<00:00, 31.32it/s, critic_loss=6.04e+5, actor_loss=-0.995, bc_loss=1.07]


2025-12-06 23:44.47 [info     ] ReBRAC_20251206214911: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.01850131034851074, 'time_algorithm_update': 0.012628682851791383, 'critic_loss': 602916.88109375, 'actor_loss': -0.9954008764028549, 'bc_loss': 1.069072788119316, 'time_step': 0.03139011001586914, 'td_error': 294390.6140538543, 'value_scale': 1060.7582981158782, 'discounted_advantage': -3784.7688810678655, 'initial_state': 884.9590454101562, 'diff_eval': 8864.352052842047} step=191000
2025-12-06 23:44.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.43it/s, critic_loss=5.73e+5, actor_loss=-0.995, bc_loss=1.05]


2025-12-06 23:45.23 [info     ] ReBRAC_20251206214911: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.019033797025680543, 'time_algorithm_update': 0.013023063182830811, 'critic_loss': 573580.013875, 'actor_loss': -0.9946863918304444, 'bc_loss': 1.0522492905855179, 'time_step': 0.03233063554763794, 'td_error': 310855.35039868846, 'value_scale': 1071.5785274047964, 'discounted_advantage': -3651.2620048448903, 'initial_state': 904.2611694335938, 'diff_eval': 8513.76734076183} step=192000
2025-12-06 23:45.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.82it/s, critic_loss=5.56e+5, actor_loss=-0.994, bc_loss=1.05]


2025-12-06 23:45.59 [info     ] ReBRAC_20251206214911: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.01908355927467346, 'time_algorithm_update': 0.012584033489227296, 'critic_loss': 556092.32990625, 'actor_loss': -0.9934922853708267, 'bc_loss': 1.0480987166166305, 'time_step': 0.03193235182762146, 'td_error': 329208.036300122, 'value_scale': 1013.6475212860787, 'discounted_advantage': -3692.618281676095, 'initial_state': 803.030029296875, 'diff_eval': 9082.541236508834} step=193000
2025-12-06 23:45.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.89it/s, critic_loss=5.17e+5, actor_loss=-0.992, bc_loss=1.05]


2025-12-06 23:46.35 [info     ] ReBRAC_20251206214911: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.01902587819099426, 'time_algorithm_update': 0.012598762035369873, 'critic_loss': 516728.64809375, 'actor_loss': -0.99181072306633, 'bc_loss': 1.0543797566890716, 'time_step': 0.0318711256980896, 'td_error': 320153.7908737257, 'value_scale': 952.3605892107653, 'discounted_advantage': -3648.529462441693, 'initial_state': 848.27099609375, 'diff_eval': 9814.718143616232} step=194000
2025-12-06 23:46.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.38it/s, critic_loss=5.4e+5, actor_loss=-0.992, bc_loss=1.03]


2025-12-06 23:47.11 [info     ] ReBRAC_20251206214911: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.01929755234718323, 'time_algorithm_update': 0.012826297760009766, 'critic_loss': 540589.816, 'actor_loss': -0.9922801682949066, 'bc_loss': 1.0261452777385711, 'time_step': 0.03238737750053406, 'td_error': 328457.71950310236, 'value_scale': 890.733278446805, 'discounted_advantage': -3646.5053955692774, 'initial_state': 858.3387451171875, 'diff_eval': 10241.332809525053} step=195000
2025-12-06 23:47.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.50it/s, critic_loss=5.01e+5, actor_loss=-0.992, bc_loss=1.03]


2025-12-06 23:47.47 [info     ] ReBRAC_20251206214911: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.019147587299346923, 'time_algorithm_update': 0.01287924075126648, 'critic_loss': 501160.5515, 'actor_loss': -0.9923580250740052, 'bc_loss': 1.0312960289716722, 'time_step': 0.03228557920455933, 'td_error': 323852.98393495515, 'value_scale': 885.5770696507016, 'discounted_advantage': -3648.6698685955485, 'initial_state': 929.1648559570312, 'diff_eval': 10486.43698855253} step=196000
2025-12-06 23:47.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.73it/s, critic_loss=4.72e+5, actor_loss=-0.992, bc_loss=1.01]


2025-12-06 23:48.23 [info     ] ReBRAC_20251206214911: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.018996057271957398, 'time_algorithm_update': 0.01280120038986206, 'critic_loss': 471786.9880625, 'actor_loss': -0.9921023240089416, 'bc_loss': 1.0137463839054108, 'time_step': 0.03204834055900574, 'td_error': 326856.4281362892, 'value_scale': 871.6605815417809, 'discounted_advantage': -3667.09065795818, 'initial_state': 928.30810546875, 'diff_eval': 10321.400824712991} step=197000
2025-12-06 23:48.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.70it/s, critic_loss=4.51e+5, actor_loss=-0.992, bc_loss=0.993]


2025-12-06 23:48.59 [info     ] ReBRAC_20251206214911: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.01920135974884033, 'time_algorithm_update': 0.012603825092315673, 'critic_loss': 450455.6723125, 'actor_loss': -0.99199948990345, 'bc_loss': 0.9929561538696289, 'time_step': 0.032060123205184934, 'td_error': 292869.08182303014, 'value_scale': 766.858549966284, 'discounted_advantage': -3493.9700457043346, 'initial_state': 706.58251953125, 'diff_eval': 11658.86689631441} step=198000
2025-12-06 23:48.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:33<00:00, 30.03it/s, critic_loss=4.14e+5, actor_loss=-0.992, bc_loss=0.983]


2025-12-06 23:49.36 [info     ] ReBRAC_20251206214911: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.01954161310195923, 'time_algorithm_update': 0.012940081119537353, 'critic_loss': 413499.45334375, 'actor_loss': -0.9915758221149444, 'bc_loss': 0.9831491898298264, 'time_step': 0.0327492003440857, 'td_error': 273695.2315746008, 'value_scale': 812.5432733333321, 'discounted_advantage': -3360.8911662985506, 'initial_state': 760.8071899414062, 'diff_eval': 10882.779797526324} step=199000
2025-12-06 23:49.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:32<00:00, 30.92it/s, critic_loss=3.8e+5, actor_loss=-0.991, bc_loss=0.976]


2025-12-06 23:50.12 [info     ] ReBRAC_20251206214911: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.018725950479507446, 'time_algorithm_update': 0.012857919692993163, 'critic_loss': 380111.38765625, 'actor_loss': -0.991177176117897, 'bc_loss': 0.9759939533472061, 'time_step': 0.031834068775177, 'td_error': 271504.89642692887, 'value_scale': 860.6299538744765, 'discounted_advantage': -3377.798870177786, 'initial_state': 855.2223510742188, 'diff_eval': 11068.012319541684} step=200000
2025-12-06 23:50.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\ReBRAC_20251206214911\model_200000.d3
Training model:  IQL
2025-12-06 23:50.12 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(7,)]), action_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=1)
20

Epoch 1/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.29it/s, critic_loss=0.384, q_loss=0.381, v_loss=0.00338, actor_loss=-0.0954] 


2025-12-06 23:50.38 [info     ] IQL_20251206235012: epoch=1 step=1000 epoch=1 metrics={'time_sample_batch': 0.004825558423995972, 'time_algorithm_update': 0.017069228172302248, 'critic_loss': 0.387898919545114, 'q_loss': 0.3844328249692917, 'v_loss': 0.003466094514977158, 'actor_loss': -0.09667048839107156, 'time_step': 0.022154200077056884, 'td_error': 0.74151185757045, 'value_scale': 2.3320800315358223, 'discounted_advantage': -1.2583524416689282, 'initial_state': 2.6780569553375244, 'diff_eval': 2523.1340927422475} step=1000
2025-12-06 23:50.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_1000.d3


Epoch 2/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.93it/s, critic_loss=1.24, q_loss=1.2, v_loss=0.0424, actor_loss=-0.3]    


2025-12-06 23:51.05 [info     ] IQL_20251206235012: epoch=2 step=2000 epoch=2 metrics={'time_sample_batch': 0.004902599096298218, 'time_algorithm_update': 0.017702624797821045, 'critic_loss': 1.2435275647044182, 'q_loss': 1.2008193908929825, 'v_loss': 0.04270817464683205, 'actor_loss': -0.3015852838642895, 'time_step': 0.02288973617553711, 'td_error': 1.3129441202189125, 'value_scale': 3.537698503610212, 'discounted_advantage': -3.0031638878098907, 'initial_state': 4.0007758140563965, 'diff_eval': 2284.246637018987} step=2000
2025-12-06 23:51.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_2000.d3


Epoch 3/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.22it/s, critic_loss=2.13, q_loss=2.02, v_loss=0.107, actor_loss=-0.486]


2025-12-06 23:51.31 [info     ] IQL_20251206235012: epoch=3 step=3000 epoch=3 metrics={'time_sample_batch': 0.0048015787601470945, 'time_algorithm_update': 0.01712712597846985, 'critic_loss': 2.1287717213630675, 'q_loss': 2.0221455677747726, 'v_loss': 0.1066261511668563, 'actor_loss': -0.4841122087612748, 'time_step': 0.02219436454772949, 'td_error': 1.512015098169189, 'value_scale': 4.241049186807447, 'discounted_advantage': -2.65945140886269, 'initial_state': 4.8922505378723145, 'diff_eval': 2717.807164787625} step=3000
2025-12-06 23:51.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_3000.d3


Epoch 4/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.45it/s, critic_loss=2.8, q_loss=2.66, v_loss=0.145, actor_loss=-0.463]


2025-12-06 23:51.57 [info     ] IQL_20251206235012: epoch=4 step=4000 epoch=4 metrics={'time_sample_batch': 0.004758553504943848, 'time_algorithm_update': 0.017075175046920778, 'critic_loss': 2.8042433104515077, 'q_loss': 2.6593066455125807, 'v_loss': 0.14493666587024928, 'actor_loss': -0.45765955539420244, 'time_step': 0.022103968620300292, 'td_error': 1.8781478713972473, 'value_scale': 4.681099055112048, 'discounted_advantage': -4.04556904271819, 'initial_state': 5.0150299072265625, 'diff_eval': 2427.414871384599} step=4000
2025-12-06 23:51.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_4000.d3


Epoch 5/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.75it/s, critic_loss=3.19, q_loss=3.02, v_loss=0.171, actor_loss=-0.432]


2025-12-06 23:52.24 [info     ] IQL_20251206235012: epoch=5 step=5000 epoch=5 metrics={'time_sample_batch': 0.005217373132705688, 'time_algorithm_update': 0.016960031747817992, 'critic_loss': 3.1959459266662598, 'q_loss': 3.0246354674100875, 'v_loss': 0.1713104618191719, 'actor_loss': -0.43186243665777146, 'time_step': 0.022442222356796264, 'td_error': 2.005610776981736, 'value_scale': 4.806595417496819, 'discounted_advantage': -3.742226803902814, 'initial_state': 5.337522029876709, 'diff_eval': 2045.4793953460712} step=5000
2025-12-06 23:52.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_5000.d3


Epoch 6/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.10it/s, critic_loss=3.51, q_loss=3.33, v_loss=0.185, actor_loss=-0.479]


2025-12-06 23:52.50 [info     ] IQL_20251206235012: epoch=6 step=6000 epoch=6 metrics={'time_sample_batch': 0.004589320898056031, 'time_algorithm_update': 0.01743247127532959, 'critic_loss': 3.5171129183769225, 'q_loss': 3.3322036052942274, 'v_loss': 0.18490930807590483, 'actor_loss': -0.4801105426736176, 'time_step': 0.022261969804763793, 'td_error': 2.0877321087437384, 'value_scale': 4.977106883293229, 'discounted_advantage': -4.04030166589591, 'initial_state': 6.001929759979248, 'diff_eval': 2472.9922199265015} step=6000
2025-12-06 23:52.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_6000.d3


Epoch 7/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.75it/s, critic_loss=3.68, q_loss=3.49, v_loss=0.194, actor_loss=-0.523]


2025-12-06 23:53.16 [info     ] IQL_20251206235012: epoch=7 step=7000 epoch=7 metrics={'time_sample_batch': 0.0046916623115539555, 'time_algorithm_update': 0.01697909665107727, 'critic_loss': 3.6782485501766207, 'q_loss': 3.4849538602828978, 'v_loss': 0.1932946937903762, 'actor_loss': -0.5228228159248829, 'time_step': 0.021925753831863404, 'td_error': 2.253998180091207, 'value_scale': 5.054538866798554, 'discounted_advantage': -3.607546765233953, 'initial_state': 5.509340763092041, 'diff_eval': 1915.5368088082776} step=7000
2025-12-06 23:53.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_7000.d3


Epoch 8/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.55it/s, critic_loss=3.76, q_loss=3.56, v_loss=0.201, actor_loss=-0.61]


2025-12-06 23:53.44 [info     ] IQL_20251206235012: epoch=8 step=8000 epoch=8 metrics={'time_sample_batch': 0.004969338178634644, 'time_algorithm_update': 0.017799832344055175, 'critic_loss': 3.7614633309841157, 'q_loss': 3.560352564692497, 'v_loss': 0.2011107726842165, 'actor_loss': -0.6112313473466784, 'time_step': 0.0230583918094635, 'td_error': 2.1472510331151495, 'value_scale': 5.081930096933738, 'discounted_advantage': -3.8335395037950875, 'initial_state': 6.456451416015625, 'diff_eval': 1932.011170096882} step=8000
2025-12-06 23:53.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_8000.d3


Epoch 9/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.10it/s, critic_loss=3.88, q_loss=3.68, v_loss=0.196, actor_loss=-0.662]


2025-12-06 23:54.11 [info     ] IQL_20251206235012: epoch=9 step=9000 epoch=9 metrics={'time_sample_batch': 0.004861527681350708, 'time_algorithm_update': 0.01819782829284668, 'critic_loss': 3.882945558667183, 'q_loss': 3.6865540702342985, 'v_loss': 0.1963914856761694, 'actor_loss': -0.6588238519504667, 'time_step': 0.023325482845306396, 'td_error': 2.30534593694277, 'value_scale': 4.965490499888418, 'discounted_advantage': -4.596667162360468, 'initial_state': 5.5247392654418945, 'diff_eval': 2865.024806377251} step=9000
2025-12-06 23:54.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_9000.d3


Epoch 10/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.44it/s, critic_loss=3.89, q_loss=3.7, v_loss=0.194, actor_loss=-0.704]


2025-12-06 23:54.38 [info     ] IQL_20251206235012: epoch=10 step=10000 epoch=10 metrics={'time_sample_batch': 0.004860747814178467, 'time_algorithm_update': 0.017516178131103517, 'critic_loss': 3.889872445344925, 'q_loss': 3.6954512329101563, 'v_loss': 0.19442121481895447, 'actor_loss': -0.70481676768139, 'time_step': 0.022635516166687013, 'td_error': 2.259800481229473, 'value_scale': 4.957072477547291, 'discounted_advantage': -3.5599187520294033, 'initial_state': 5.795928955078125, 'diff_eval': 1722.0562842325494} step=10000
2025-12-06 23:54.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_10000.d3


Epoch 11/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.81it/s, critic_loss=3.96, q_loss=3.76, v_loss=0.198, actor_loss=-0.793]


2025-12-06 23:55.04 [info     ] IQL_20251206235012: epoch=11 step=11000 epoch=11 metrics={'time_sample_batch': 0.004701436281204224, 'time_algorithm_update': 0.017439916372299195, 'critic_loss': 3.9558271045684816, 'q_loss': 3.7579429315328596, 'v_loss': 0.1978841729015112, 'actor_loss': -0.7889002021104098, 'time_step': 0.02241110587120056, 'td_error': 2.2280357332111764, 'value_scale': 5.038861894921523, 'discounted_advantage': -3.47845363350523, 'initial_state': 6.126589298248291, 'diff_eval': 1561.976281290003} step=11000
2025-12-06 23:55.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_11000.d3


Epoch 12/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.78it/s, critic_loss=4.01, q_loss=3.81, v_loss=0.198, actor_loss=-0.799]


2025-12-06 23:55.31 [info     ] IQL_20251206235012: epoch=12 step=12000 epoch=12 metrics={'time_sample_batch': 0.004716590642929077, 'time_algorithm_update': 0.017432520627975463, 'critic_loss': 4.013840692400932, 'q_loss': 3.8162467057704927, 'v_loss': 0.19759398274123668, 'actor_loss': -0.7980824798569083, 'time_step': 0.022425031900405883, 'td_error': 2.3525370428591397, 'value_scale': 4.845503852381827, 'discounted_advantage': -3.529408937251219, 'initial_state': 5.427201747894287, 'diff_eval': 1783.6900487917437} step=12000
2025-12-06 23:55.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_12000.d3


Epoch 13/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.75it/s, critic_loss=4.01, q_loss=3.82, v_loss=0.196, actor_loss=-0.853]


2025-12-06 23:55.57 [info     ] IQL_20251206235012: epoch=13 step=13000 epoch=13 metrics={'time_sample_batch': 0.004757965087890625, 'time_algorithm_update': 0.017162420511245727, 'critic_loss': 4.011926252365113, 'q_loss': 3.8159626162052156, 'v_loss': 0.19596363966166974, 'actor_loss': -0.8519624745622277, 'time_step': 0.022190234899520874, 'td_error': 2.2634946171497723, 'value_scale': 4.899763449186136, 'discounted_advantage': -3.3666491615051126, 'initial_state': 5.746696949005127, 'diff_eval': 1567.5966686660245} step=13000
2025-12-06 23:55.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_13000.d3


Epoch 14/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.22it/s, critic_loss=4.07, q_loss=3.87, v_loss=0.199, actor_loss=-0.908]


2025-12-06 23:56.23 [info     ] IQL_20251206235012: epoch=14 step=14000 epoch=14 metrics={'time_sample_batch': 0.004894711494445801, 'time_algorithm_update': 0.017526464462280275, 'critic_loss': 4.069324817419052, 'q_loss': 3.869698174715042, 'v_loss': 0.1996266466975212, 'actor_loss': -0.9084334413893521, 'time_step': 0.02270616388320923, 'td_error': 2.2479066765672067, 'value_scale': 4.548118084643181, 'discounted_advantage': -2.7322223015684823, 'initial_state': 5.53460693359375, 'diff_eval': 1483.8272216908583} step=14000
2025-12-06 23:56.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_14000.d3


Epoch 15/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.16it/s, critic_loss=4.14, q_loss=3.93, v_loss=0.204, actor_loss=-0.906]


2025-12-06 23:56.50 [info     ] IQL_20251206235012: epoch=15 step=15000 epoch=15 metrics={'time_sample_batch': 0.004745219230651855, 'time_algorithm_update': 0.017199416875839233, 'critic_loss': 4.134782885789871, 'q_loss': 3.9305476131439208, 'v_loss': 0.2042352741509676, 'actor_loss': -0.9073822242692113, 'time_step': 0.022202428102493284, 'td_error': 2.274698333586279, 'value_scale': 4.994134882001295, 'discounted_advantage': -3.542151270843954, 'initial_state': 6.379439830780029, 'diff_eval': 1443.5872525804407} step=15000
2025-12-06 23:56.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_15000.d3


Epoch 16/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.70it/s, critic_loss=4.1, q_loss=3.89, v_loss=0.204, actor_loss=-0.974]


2025-12-06 23:57.16 [info     ] IQL_20251206235012: epoch=16 step=16000 epoch=16 metrics={'time_sample_batch': 0.004914687633514404, 'time_algorithm_update': 0.017306275844573976, 'critic_loss': 4.094947849750519, 'q_loss': 3.890632852077484, 'v_loss': 0.20431499993801117, 'actor_loss': -0.9744583161771297, 'time_step': 0.02248725342750549, 'td_error': 2.2896852470774545, 'value_scale': 4.8002902751722925, 'discounted_advantage': -3.263177386895353, 'initial_state': 6.307497501373291, 'diff_eval': 1347.330526954196} step=16000
2025-12-06 23:57.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_16000.d3


Epoch 17/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.64it/s, critic_loss=4.2, q_loss=3.99, v_loss=0.207, actor_loss=-1.02] 


2025-12-06 23:57.43 [info     ] IQL_20251206235012: epoch=17 step=17000 epoch=17 metrics={'time_sample_batch': 0.004860262155532837, 'time_algorithm_update': 0.017342849016189574, 'critic_loss': 4.202165425896645, 'q_loss': 3.9955321863889695, 'v_loss': 0.2066332402974367, 'actor_loss': -1.0206906762570143, 'time_step': 0.022486064672470094, 'td_error': 2.604470244362482, 'value_scale': 4.807583062825232, 'discounted_advantage': -3.647341102863246, 'initial_state': 4.953214645385742, 'diff_eval': 1508.573271907371} step=17000
2025-12-06 23:57.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_17000.d3


Epoch 18/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.86it/s, critic_loss=4.2, q_loss=3.99, v_loss=0.209, actor_loss=-1.08]


2025-12-06 23:58.09 [info     ] IQL_20251206235012: epoch=18 step=18000 epoch=18 metrics={'time_sample_batch': 0.004814796209335327, 'time_algorithm_update': 0.017294922828674317, 'critic_loss': 4.193577196002007, 'q_loss': 3.9844591224193575, 'v_loss': 0.20911807145923375, 'actor_loss': -1.0786534313037992, 'time_step': 0.02237441611289978, 'td_error': 2.4789404182662653, 'value_scale': 4.836093246791206, 'discounted_advantage': -3.8777942294759145, 'initial_state': 6.583184719085693, 'diff_eval': 1268.6077224342757} step=18000
2025-12-06 23:58.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_18000.d3


Epoch 19/200: 100%|██████████| 1000/1000 [00:22<00:00, 45.12it/s, critic_loss=4.22, q_loss=4.01, v_loss=0.208, actor_loss=-1.12]


2025-12-06 23:58.35 [info     ] IQL_20251206235012: epoch=19 step=19000 epoch=19 metrics={'time_sample_batch': 0.004635106563568115, 'time_algorithm_update': 0.01685828113555908, 'critic_loss': 4.210933210372925, 'q_loss': 4.002955069899559, 'v_loss': 0.20797813843935728, 'actor_loss': -1.1191140652298928, 'time_step': 0.021739936113357543, 'td_error': 2.427259393258212, 'value_scale': 5.029404212521625, 'discounted_advantage': -3.575587360949288, 'initial_state': 6.087953090667725, 'diff_eval': 1319.1460339158666} step=19000
2025-12-06 23:58.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_19000.d3


Epoch 20/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.28it/s, critic_loss=4.33, q_loss=4.11, v_loss=0.212, actor_loss=-1.18]


2025-12-06 23:59.01 [info     ] IQL_20251206235012: epoch=20 step=20000 epoch=20 metrics={'time_sample_batch': 0.004782450199127197, 'time_algorithm_update': 0.017110078096389772, 'critic_loss': 4.324294700860977, 'q_loss': 4.112656889081001, 'v_loss': 0.2116378119215369, 'actor_loss': -1.1811821402311324, 'time_step': 0.02216250801086426, 'td_error': 2.3318233211438955, 'value_scale': 5.005175999881761, 'discounted_advantage': -2.8449275764442237, 'initial_state': 6.758075714111328, 'diff_eval': 1186.3979957799415} step=20000
2025-12-06 23:59.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_20000.d3


Epoch 21/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.30it/s, critic_loss=4.33, q_loss=4.12, v_loss=0.211, actor_loss=-1.19]


2025-12-06 23:59.28 [info     ] IQL_20251206235012: epoch=21 step=21000 epoch=21 metrics={'time_sample_batch': 0.005020228862762452, 'time_algorithm_update': 0.017903979778289794, 'critic_loss': 4.334697351694107, 'q_loss': 4.12319967842102, 'v_loss': 0.2114976681470871, 'actor_loss': -1.191201787829399, 'time_step': 0.023184091806411743, 'td_error': 2.5673851810860433, 'value_scale': 4.75710044107115, 'discounted_advantage': -3.757557232938887, 'initial_state': 6.004223823547363, 'diff_eval': 1631.3574676635944} step=21000
2025-12-06 23:59.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_21000.d3


Epoch 22/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.80it/s, critic_loss=4.35, q_loss=4.14, v_loss=0.213, actor_loss=-1.23]


2025-12-06 23:59.55 [info     ] IQL_20251206235012: epoch=22 step=22000 epoch=22 metrics={'time_sample_batch': 0.004967738151550293, 'time_algorithm_update': 0.01773723316192627, 'critic_loss': 4.356323897004128, 'q_loss': 4.143678444027901, 'v_loss': 0.21264545249193906, 'actor_loss': -1.229487009525299, 'time_step': 0.022971468687057496, 'td_error': 2.504308009911577, 'value_scale': 4.746449706656741, 'discounted_advantage': -3.2900858210601074, 'initial_state': 5.403061389923096, 'diff_eval': 1218.0571337574784} step=22000
2025-12-06 23:59.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_22000.d3


Epoch 23/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.34it/s, critic_loss=4.39, q_loss=4.18, v_loss=0.21, actor_loss=-1.24]


2025-12-07 00:00.22 [info     ] IQL_20251206235012: epoch=23 step=23000 epoch=23 metrics={'time_sample_batch': 0.005095487833023072, 'time_algorithm_update': 0.01780456066131592, 'critic_loss': 4.386538581490517, 'q_loss': 4.176011712789536, 'v_loss': 0.21052687688171864, 'actor_loss': -1.2459091287255286, 'time_step': 0.02318448305130005, 'td_error': 2.4207379012356105, 'value_scale': 4.883538691779951, 'discounted_advantage': -3.3830769863979233, 'initial_state': 6.167426586151123, 'diff_eval': 1182.154991101586} step=23000
2025-12-07 00:00.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_23000.d3


Epoch 24/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.08it/s, critic_loss=4.48, q_loss=4.26, v_loss=0.213, actor_loss=-1.32]


2025-12-07 00:00.49 [info     ] IQL_20251206235012: epoch=24 step=24000 epoch=24 metrics={'time_sample_batch': 0.004949525117874145, 'time_algorithm_update': 0.017572723388671874, 'critic_loss': 4.482185370326042, 'q_loss': 4.268654621124267, 'v_loss': 0.21353075047582387, 'actor_loss': -1.3152335223704577, 'time_step': 0.022784168004989624, 'td_error': 2.59087763025912, 'value_scale': 4.8543955460706005, 'discounted_advantage': -3.477641487175884, 'initial_state': 6.002825736999512, 'diff_eval': 1199.213621995176} step=24000
2025-12-07 00:00.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_24000.d3


Epoch 25/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.07it/s, critic_loss=4.39, q_loss=4.18, v_loss=0.21, actor_loss=-1.26]


2025-12-07 00:01.16 [info     ] IQL_20251206235012: epoch=25 step=25000 epoch=25 metrics={'time_sample_batch': 0.004972077846527099, 'time_algorithm_update': 0.01758103394508362, 'critic_loss': 4.390260740756989, 'q_loss': 4.180436117887497, 'v_loss': 0.20982462561130524, 'actor_loss': -1.2620418517291545, 'time_step': 0.022804697275161743, 'td_error': 2.502250041879611, 'value_scale': 4.894718041791434, 'discounted_advantage': -3.3379135356401157, 'initial_state': 5.617743968963623, 'diff_eval': 1059.3698735283497} step=25000
2025-12-07 00:01.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_25000.d3


Epoch 26/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.52it/s, critic_loss=4.44, q_loss=4.23, v_loss=0.21, actor_loss=-1.3] 


2025-12-07 00:01.43 [info     ] IQL_20251206235012: epoch=26 step=26000 epoch=26 metrics={'time_sample_batch': 0.005118708372116089, 'time_algorithm_update': 0.01771114468574524, 'critic_loss': 4.443979580521583, 'q_loss': 4.234252856492996, 'v_loss': 0.20972672594338657, 'actor_loss': -1.2975346423387528, 'time_step': 0.023087347984313965, 'td_error': 2.6502592331961083, 'value_scale': 4.734025273608686, 'discounted_advantage': -4.0388256863746355, 'initial_state': 5.386614799499512, 'diff_eval': 1206.9729213692656} step=26000
2025-12-07 00:01.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_26000.d3


Epoch 27/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.07it/s, critic_loss=4.38, q_loss=4.17, v_loss=0.21, actor_loss=-1.35]


2025-12-07 00:02.10 [info     ] IQL_20251206235012: epoch=27 step=27000 epoch=27 metrics={'time_sample_batch': 0.004994433403015136, 'time_algorithm_update': 0.018074589490890504, 'critic_loss': 4.385441423177719, 'q_loss': 4.17529601097107, 'v_loss': 0.21014541084319352, 'actor_loss': -1.3463777735084295, 'time_step': 0.023328346729278564, 'td_error': 2.425501205323571, 'value_scale': 4.932975167417953, 'discounted_advantage': -3.0856386362430452, 'initial_state': 5.701286792755127, 'diff_eval': 1077.382888755994} step=27000
2025-12-07 00:02.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_27000.d3


Epoch 28/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.49it/s, critic_loss=4.42, q_loss=4.21, v_loss=0.21, actor_loss=-1.34]


2025-12-07 00:02.37 [info     ] IQL_20251206235012: epoch=28 step=28000 epoch=28 metrics={'time_sample_batch': 0.004917667388916015, 'time_algorithm_update': 0.017399935245513917, 'critic_loss': 4.414064181685448, 'q_loss': 4.203910309553146, 'v_loss': 0.21015386471152306, 'actor_loss': -1.3360226517915725, 'time_step': 0.02259298038482666, 'td_error': 2.347962532031938, 'value_scale': 4.861754349199079, 'discounted_advantage': -3.1598497223778548, 'initial_state': 6.114362716674805, 'diff_eval': 1050.1414095830337} step=28000
2025-12-07 00:02.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_28000.d3


Epoch 29/200: 100%|██████████| 1000/1000 [00:24<00:00, 40.47it/s, critic_loss=4.47, q_loss=4.26, v_loss=0.209, actor_loss=-1.38]


2025-12-07 00:03.05 [info     ] IQL_20251206235012: epoch=29 step=29000 epoch=29 metrics={'time_sample_batch': 0.005843181610107422, 'time_algorithm_update': 0.018178130865097047, 'critic_loss': 4.475106020689011, 'q_loss': 4.2657830940485, 'v_loss': 0.209322916328907, 'actor_loss': -1.3843393925651908, 'time_step': 0.024288243055343627, 'td_error': 2.4669881764969057, 'value_scale': 4.9034183773029465, 'discounted_advantage': -3.431527571237705, 'initial_state': 6.105117321014404, 'diff_eval': 979.2189914834038} step=29000
2025-12-07 00:03.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_29000.d3


Epoch 30/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.23it/s, critic_loss=4.43, q_loss=4.22, v_loss=0.207, actor_loss=-1.41]


2025-12-07 00:03.33 [info     ] IQL_20251206235012: epoch=30 step=30000 epoch=30 metrics={'time_sample_batch': 0.005074343204498291, 'time_algorithm_update': 0.01795003867149353, 'critic_loss': 4.424412227630615, 'q_loss': 4.217287902474403, 'v_loss': 0.2071243247538805, 'actor_loss': -1.4115049237161874, 'time_step': 0.02331101393699646, 'td_error': 2.5405404868067945, 'value_scale': 5.145250306301574, 'discounted_advantage': -3.4518820672535284, 'initial_state': 6.334896087646484, 'diff_eval': 1092.6974949660844} step=30000
2025-12-07 00:03.33 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_30000.d3


Epoch 31/200: 100%|██████████| 1000/1000 [00:24<00:00, 41.59it/s, critic_loss=4.46, q_loss=4.25, v_loss=0.21, actor_loss=-1.46]


2025-12-07 00:04.00 [info     ] IQL_20251206235012: epoch=31 step=31000 epoch=31 metrics={'time_sample_batch': 0.005030474662780762, 'time_algorithm_update': 0.018325023412704467, 'critic_loss': 4.459327010989189, 'q_loss': 4.248777765154839, 'v_loss': 0.21054923562705516, 'actor_loss': -1.4581865740418434, 'time_step': 0.023630959272384643, 'td_error': 2.5264174969961584, 'value_scale': 5.044716785041254, 'discounted_advantage': -3.575073712568301, 'initial_state': 6.28682804107666, 'diff_eval': 1095.6941996396745} step=31000
2025-12-07 00:04.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_31000.d3


Epoch 32/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.00it/s, critic_loss=4.54, q_loss=4.33, v_loss=0.205, actor_loss=-1.43]


2025-12-07 00:04.27 [info     ] IQL_20251206235012: epoch=32 step=32000 epoch=32 metrics={'time_sample_batch': 0.005028184652328491, 'time_algorithm_update': 0.017504801988601684, 'critic_loss': 4.537523699045181, 'q_loss': 4.33244393157959, 'v_loss': 0.20507976967841388, 'actor_loss': -1.4281804040521384, 'time_step': 0.02281126546859741, 'td_error': 2.4498760007787994, 'value_scale': 4.809375330039174, 'discounted_advantage': -2.929418205725123, 'initial_state': 5.631066799163818, 'diff_eval': 924.0490911208605} step=32000
2025-12-07 00:04.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_32000.d3


Epoch 33/200: 100%|██████████| 1000/1000 [00:24<00:00, 41.60it/s, critic_loss=4.47, q_loss=4.26, v_loss=0.206, actor_loss=-1.5]


2025-12-07 00:04.55 [info     ] IQL_20251206235012: epoch=33 step=33000 epoch=33 metrics={'time_sample_batch': 0.005142766237258911, 'time_algorithm_update': 0.01818991804122925, 'critic_loss': 4.4702463220357895, 'q_loss': 4.264289207100868, 'v_loss': 0.20595711822062732, 'actor_loss': -1.503754653930664, 'time_step': 0.023603381633758545, 'td_error': 2.342402953131835, 'value_scale': 4.919195478115085, 'discounted_advantage': -2.5648571166522283, 'initial_state': 6.252908229827881, 'diff_eval': 877.650859748512} step=33000
2025-12-07 00:04.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_33000.d3


Epoch 34/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.36it/s, critic_loss=4.51, q_loss=4.3, v_loss=0.206, actor_loss=-1.5] 


2025-12-07 00:05.21 [info     ] IQL_20251206235012: epoch=34 step=34000 epoch=34 metrics={'time_sample_batch': 0.005009104967117309, 'time_algorithm_update': 0.017366796970367433, 'critic_loss': 4.515290699362755, 'q_loss': 4.309089144229889, 'v_loss': 0.20620156167447568, 'actor_loss': -1.503413419485092, 'time_step': 0.022648303985595704, 'td_error': 2.3245226389465303, 'value_scale': 4.90227359367967, 'discounted_advantage': -2.6022073666347443, 'initial_state': 6.36700439453125, 'diff_eval': 885.8260407940433} step=34000
2025-12-07 00:05.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_34000.d3


Epoch 35/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.89it/s, critic_loss=4.48, q_loss=4.28, v_loss=0.203, actor_loss=-1.48]


2025-12-07 00:05.48 [info     ] IQL_20251206235012: epoch=35 step=35000 epoch=35 metrics={'time_sample_batch': 0.004828349351882934, 'time_algorithm_update': 0.017273814916610716, 'critic_loss': 4.47606028342247, 'q_loss': 4.2727844620943065, 'v_loss': 0.20327582390606402, 'actor_loss': -1.4819415961354971, 'time_step': 0.022372560262680054, 'td_error': 2.398975194688772, 'value_scale': 4.67749686434253, 'discounted_advantage': -3.028105818692511, 'initial_state': 5.955688953399658, 'diff_eval': 936.54621059267} step=35000
2025-12-07 00:05.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_35000.d3


Epoch 36/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.70it/s, critic_loss=4.56, q_loss=4.36, v_loss=0.201, actor_loss=-1.52]


2025-12-07 00:06.15 [info     ] IQL_20251206235012: epoch=36 step=36000 epoch=36 metrics={'time_sample_batch': 0.005054570913314819, 'time_algorithm_update': 0.01765230345726013, 'critic_loss': 4.557400673866272, 'q_loss': 4.356392153978348, 'v_loss': 0.20100852248817683, 'actor_loss': -1.525147987768054, 'time_step': 0.022991503953933717, 'td_error': 2.568541018498245, 'value_scale': 4.87096089581051, 'discounted_advantage': -3.124604032313326, 'initial_state': 5.401994228363037, 'diff_eval': 986.5482332315604} step=36000
2025-12-07 00:06.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_36000.d3


Epoch 37/200: 100%|██████████| 1000/1000 [00:23<00:00, 41.69it/s, critic_loss=4.49, q_loss=4.29, v_loss=0.203, actor_loss=-1.52]


2025-12-07 00:06.42 [info     ] IQL_20251206235012: epoch=37 step=37000 epoch=37 metrics={'time_sample_batch': 0.005235361576080322, 'time_algorithm_update': 0.018062543392181395, 'critic_loss': 4.494239880084992, 'q_loss': 4.29074069070816, 'v_loss': 0.2034991893991828, 'actor_loss': -1.5213842184841633, 'time_step': 0.023569395065307617, 'td_error': 2.4568672420761883, 'value_scale': 5.251931225560814, 'discounted_advantage': -3.449870299289123, 'initial_state': 7.312438488006592, 'diff_eval': 882.6243747900006} step=37000
2025-12-07 00:06.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_37000.d3


Epoch 38/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.25it/s, critic_loss=4.64, q_loss=4.43, v_loss=0.206, actor_loss=-1.55]


2025-12-07 00:07.09 [info     ] IQL_20251206235012: epoch=38 step=38000 epoch=38 metrics={'time_sample_batch': 0.005133296728134155, 'time_algorithm_update': 0.017823559522628783, 'critic_loss': 4.634671911716461, 'q_loss': 4.42830054461956, 'v_loss': 0.20637136621773242, 'actor_loss': -1.5543675988316537, 'time_step': 0.023239230155944823, 'td_error': 2.425585898184427, 'value_scale': 4.816239425809453, 'discounted_advantage': -3.1461774653826593, 'initial_state': 5.999504566192627, 'diff_eval': 806.0489306097734} step=38000
2025-12-07 00:07.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_38000.d3


Epoch 39/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.60it/s, critic_loss=4.54, q_loss=4.34, v_loss=0.206, actor_loss=-1.62]


2025-12-07 00:07.36 [info     ] IQL_20251206235012: epoch=39 step=39000 epoch=39 metrics={'time_sample_batch': 0.004812591075897217, 'time_algorithm_update': 0.01740573453903198, 'critic_loss': 4.549382349014282, 'q_loss': 4.343132750630379, 'v_loss': 0.2062495991140604, 'actor_loss': -1.6150175349414349, 'time_step': 0.022495498418807984, 'td_error': 2.661329730212361, 'value_scale': 4.927185984364299, 'discounted_advantage': -3.035561831325771, 'initial_state': 5.395672798156738, 'diff_eval': 836.1285051078942} step=39000
2025-12-07 00:07.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_39000.d3


Epoch 40/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.30it/s, critic_loss=4.61, q_loss=4.41, v_loss=0.202, actor_loss=-1.59]


2025-12-07 00:08.03 [info     ] IQL_20251206235012: epoch=40 step=40000 epoch=40 metrics={'time_sample_batch': 0.005092976331710816, 'time_algorithm_update': 0.017837101459503173, 'critic_loss': 4.613049741148949, 'q_loss': 4.411202906966209, 'v_loss': 0.2018468408510089, 'actor_loss': -1.5937617538571358, 'time_step': 0.023222509622573852, 'td_error': 2.606103710575874, 'value_scale': 4.627417210419252, 'discounted_advantage': -2.645423965291009, 'initial_state': 5.609420299530029, 'diff_eval': 850.2904322574462} step=40000
2025-12-07 00:08.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_40000.d3


Epoch 41/200: 100%|██████████| 1000/1000 [00:24<00:00, 41.12it/s, critic_loss=4.64, q_loss=4.44, v_loss=0.203, actor_loss=-1.61]


2025-12-07 00:08.31 [info     ] IQL_20251206235012: epoch=41 step=41000 epoch=41 metrics={'time_sample_batch': 0.005238027572631836, 'time_algorithm_update': 0.018337926387786866, 'critic_loss': 4.636279516339302, 'q_loss': 4.433797370672226, 'v_loss': 0.2024821430966258, 'actor_loss': -1.6066712672412395, 'time_step': 0.0238789803981781, 'td_error': 2.4945953728739734, 'value_scale': 4.732022746046497, 'discounted_advantage': -3.279488356455323, 'initial_state': 6.186054229736328, 'diff_eval': 824.1489886240661} step=41000
2025-12-07 00:08.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_41000.d3


Epoch 42/200: 100%|██████████| 1000/1000 [00:24<00:00, 40.04it/s, critic_loss=4.58, q_loss=4.37, v_loss=0.202, actor_loss=-1.62]


2025-12-07 00:09.00 [info     ] IQL_20251206235012: epoch=42 step=42000 epoch=42 metrics={'time_sample_batch': 0.005458906173706055, 'time_algorithm_update': 0.018734811305999757, 'critic_loss': 4.58763866186142, 'q_loss': 4.385551069021225, 'v_loss': 0.20208758947998284, 'actor_loss': -1.6098204091489314, 'time_step': 0.02450158953666687, 'td_error': 2.3121259470487887, 'value_scale': 4.872477726185693, 'discounted_advantage': -0.31568867608393464, 'initial_state': 5.701876163482666, 'diff_eval': 1710.9647139329986} step=42000
2025-12-07 00:09.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_42000.d3


Epoch 43/200: 100%|██████████| 1000/1000 [00:24<00:00, 40.37it/s, critic_loss=4.59, q_loss=4.39, v_loss=0.204, actor_loss=-1.69]


2025-12-07 00:09.29 [info     ] IQL_20251206235012: epoch=43 step=43000 epoch=43 metrics={'time_sample_batch': 0.005385465383529663, 'time_algorithm_update': 0.01862757921218872, 'critic_loss': 4.593843805551529, 'q_loss': 4.3894707840681075, 'v_loss': 0.20437302435189486, 'actor_loss': -1.6957878881916404, 'time_step': 0.024301189184188842, 'td_error': 2.7355704280097934, 'value_scale': 4.867891318167108, 'discounted_advantage': -3.0651457869822485, 'initial_state': 5.184060096740723, 'diff_eval': 745.3021144044246} step=43000
2025-12-07 00:09.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_43000.d3


Epoch 44/200: 100%|██████████| 1000/1000 [00:24<00:00, 41.08it/s, critic_loss=4.54, q_loss=4.35, v_loss=0.198, actor_loss=-1.67]


2025-12-07 00:09.57 [info     ] IQL_20251206235012: epoch=44 step=44000 epoch=44 metrics={'time_sample_batch': 0.005226924657821656, 'time_algorithm_update': 0.018374077320098877, 'critic_loss': 4.544831367492676, 'q_loss': 4.3468310049772265, 'v_loss': 0.1980003566145897, 'actor_loss': -1.6681882829666137, 'time_step': 0.023898086309432985, 'td_error': 2.6170684719506707, 'value_scale': 5.032583675049839, 'discounted_advantage': -2.985235293817373, 'initial_state': 6.167582988739014, 'diff_eval': 741.1825885350257} step=44000
2025-12-07 00:09.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_44000.d3


Epoch 45/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.23it/s, critic_loss=4.53, q_loss=4.33, v_loss=0.198, actor_loss=-1.59]


2025-12-07 00:10.24 [info     ] IQL_20251206235012: epoch=45 step=45000 epoch=45 metrics={'time_sample_batch': 0.005058640480041504, 'time_algorithm_update': 0.01792166543006897, 'critic_loss': 4.5226762697696685, 'q_loss': 4.324905859231949, 'v_loss': 0.19777040985226632, 'actor_loss': -1.5917400923371314, 'time_step': 0.02327298974990845, 'td_error': 2.50925313493205, 'value_scale': 4.874427276611178, 'discounted_advantage': -2.855417724671333, 'initial_state': 5.859732627868652, 'diff_eval': 788.6741592892184} step=45000
2025-12-07 00:10.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_45000.d3


Epoch 46/200: 100%|██████████| 1000/1000 [00:24<00:00, 40.78it/s, critic_loss=4.56, q_loss=4.36, v_loss=0.197, actor_loss=-1.67]


2025-12-07 00:10.52 [info     ] IQL_20251206235012: epoch=46 step=46000 epoch=46 metrics={'time_sample_batch': 0.005315558910369873, 'time_algorithm_update': 0.018471964836120604, 'critic_loss': 4.553704600691796, 'q_loss': 4.3570188540220265, 'v_loss': 0.19668574500083924, 'actor_loss': -1.6679226611852647, 'time_step': 0.02408493995666504, 'td_error': 2.591894533573589, 'value_scale': 5.009271391992178, 'discounted_advantage': -3.2342423675277576, 'initial_state': 6.397274017333984, 'diff_eval': 743.3194272340913} step=46000
2025-12-07 00:10.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_46000.d3


Epoch 47/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.04it/s, critic_loss=4.53, q_loss=4.34, v_loss=0.195, actor_loss=-1.7]


2025-12-07 00:11.19 [info     ] IQL_20251206235012: epoch=47 step=47000 epoch=47 metrics={'time_sample_batch': 0.0049728302955627446, 'time_algorithm_update': 0.01759400534629822, 'critic_loss': 4.527336499094963, 'q_loss': 4.3319638187885285, 'v_loss': 0.19537267816066742, 'actor_loss': -1.698419091850519, 'time_step': 0.022823883771896362, 'td_error': 2.5468166870303683, 'value_scale': 4.573783498545851, 'discounted_advantage': -2.8902285155200067, 'initial_state': 5.720325946807861, 'diff_eval': 746.5352873992724} step=47000
2025-12-07 00:11.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_47000.d3


Epoch 48/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.50it/s, critic_loss=4.59, q_loss=4.39, v_loss=0.195, actor_loss=-1.66]


2025-12-07 00:11.46 [info     ] IQL_20251206235012: epoch=48 step=48000 epoch=48 metrics={'time_sample_batch': 0.005056495428085327, 'time_algorithm_update': 0.017766680955886842, 'critic_loss': 4.585293777585029, 'q_loss': 4.390176913619041, 'v_loss': 0.19511686814576387, 'actor_loss': -1.6557208000272512, 'time_step': 0.023096855401992796, 'td_error': 2.5374935477021046, 'value_scale': 4.740094901359978, 'discounted_advantage': -2.662153328140819, 'initial_state': 6.072160720825195, 'diff_eval': 760.037189743858} step=48000
2025-12-07 00:11.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_48000.d3


Epoch 49/200: 100%|██████████| 1000/1000 [00:24<00:00, 41.02it/s, critic_loss=4.54, q_loss=4.34, v_loss=0.195, actor_loss=-1.74]


2025-12-07 00:12.14 [info     ] IQL_20251206235012: epoch=49 step=49000 epoch=49 metrics={'time_sample_batch': 0.005181398630142212, 'time_algorithm_update': 0.01845660138130188, 'critic_loss': 4.5340877815485, 'q_loss': 4.339417323112488, 'v_loss': 0.19467045370489358, 'actor_loss': -1.7400821622610092, 'time_step': 0.023923404216766357, 'td_error': 2.5882918743761447, 'value_scale': 4.778958408911428, 'discounted_advantage': -3.0827261510966513, 'initial_state': 5.950543403625488, 'diff_eval': 723.9617476851151} step=49000
2025-12-07 00:12.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_49000.d3


Epoch 50/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.28it/s, critic_loss=4.61, q_loss=4.41, v_loss=0.2, actor_loss=-1.73] 


2025-12-07 00:12.41 [info     ] IQL_20251206235012: epoch=50 step=50000 epoch=50 metrics={'time_sample_batch': 0.005104457378387451, 'time_algorithm_update': 0.017843128442764283, 'critic_loss': 4.604271532058716, 'q_loss': 4.404734906435013, 'v_loss': 0.19953662644326686, 'actor_loss': -1.7275859831124545, 'time_step': 0.023229790449142456, 'td_error': 2.684565774394841, 'value_scale': 4.863571716711387, 'discounted_advantage': -3.7561948004099306, 'initial_state': 6.367734432220459, 'diff_eval': 970.406224947709} step=50000
2025-12-07 00:12.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_50000.d3


Epoch 51/200: 100%|██████████| 1000/1000 [00:24<00:00, 41.50it/s, critic_loss=4.59, q_loss=4.39, v_loss=0.197, actor_loss=-1.76]


2025-12-07 00:13.09 [info     ] IQL_20251206235012: epoch=51 step=51000 epoch=51 metrics={'time_sample_batch': 0.005182244300842285, 'time_algorithm_update': 0.018155986070632936, 'critic_loss': 4.583188472032547, 'q_loss': 4.3860210049152375, 'v_loss': 0.19716746513545513, 'actor_loss': -1.7622310723215342, 'time_step': 0.023634432554244994, 'td_error': 2.5780452834665035, 'value_scale': 4.684921236353172, 'discounted_advantage': -3.223847218458747, 'initial_state': 6.386837005615234, 'diff_eval': 770.7927275467316} step=51000
2025-12-07 00:13.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_51000.d3


Epoch 52/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.85it/s, critic_loss=4.59, q_loss=4.4, v_loss=0.193, actor_loss=-1.75]


2025-12-07 00:13.36 [info     ] IQL_20251206235012: epoch=52 step=52000 epoch=52 metrics={'time_sample_batch': 0.004976097822189331, 'time_algorithm_update': 0.01761703896522522, 'critic_loss': 4.604002693414688, 'q_loss': 4.410493495583534, 'v_loss': 0.19350920456647874, 'actor_loss': -1.7485625356584786, 'time_step': 0.0228771333694458, 'td_error': 2.6832968293304806, 'value_scale': 4.773786093637641, 'discounted_advantage': -2.470823543144125, 'initial_state': 5.584283351898193, 'diff_eval': 752.4893177185608} step=52000
2025-12-07 00:13.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_52000.d3


Epoch 53/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.71it/s, critic_loss=4.58, q_loss=4.38, v_loss=0.198, actor_loss=-1.78]


2025-12-07 00:14.03 [info     ] IQL_20251206235012: epoch=53 step=53000 epoch=53 metrics={'time_sample_batch': 0.004997053623199463, 'time_algorithm_update': 0.017692924976348876, 'critic_loss': 4.594594726681709, 'q_loss': 4.3966734088659285, 'v_loss': 0.1979213165268302, 'actor_loss': -1.7847489850521088, 'time_step': 0.022974511623382567, 'td_error': 2.490073067722377, 'value_scale': 4.935023181708741, 'discounted_advantage': -2.4235987752784567, 'initial_state': 6.290599346160889, 'diff_eval': 744.8309714717432} step=53000
2025-12-07 00:14.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_53000.d3


Epoch 54/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.51it/s, critic_loss=4.58, q_loss=4.39, v_loss=0.196, actor_loss=-1.83]


2025-12-07 00:14.30 [info     ] IQL_20251206235012: epoch=54 step=54000 epoch=54 metrics={'time_sample_batch': 0.004946523904800415, 'time_algorithm_update': 0.01734659123420715, 'critic_loss': 4.57932534134388, 'q_loss': 4.383266564965248, 'v_loss': 0.19605877751857043, 'actor_loss': -1.8278432924002408, 'time_step': 0.022562718391418456, 'td_error': 2.528498196797427, 'value_scale': 4.81878697079718, 'discounted_advantage': -2.4935878311728845, 'initial_state': 6.104965686798096, 'diff_eval': 689.1212389274115} step=54000
2025-12-07 00:14.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_54000.d3


Epoch 55/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.41it/s, critic_loss=4.48, q_loss=4.29, v_loss=0.192, actor_loss=-1.82]


2025-12-07 00:14.57 [info     ] IQL_20251206235012: epoch=55 step=55000 epoch=55 metrics={'time_sample_batch': 0.005135590553283691, 'time_algorithm_update': 0.017766837120056153, 'critic_loss': 4.476272940158844, 'q_loss': 4.284005098342895, 'v_loss': 0.19226784358918667, 'actor_loss': -1.8128690837025643, 'time_step': 0.023181277751922608, 'td_error': 2.4535443617848096, 'value_scale': 4.765075462819557, 'discounted_advantage': -2.5254955438876485, 'initial_state': 6.718976974487305, 'diff_eval': 789.5563683460064} step=55000
2025-12-07 00:14.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_55000.d3


Epoch 56/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.54it/s, critic_loss=4.54, q_loss=4.35, v_loss=0.188, actor_loss=-1.75]


2025-12-07 00:15.24 [info     ] IQL_20251206235012: epoch=56 step=56000 epoch=56 metrics={'time_sample_batch': 0.005100247859954834, 'time_algorithm_update': 0.017710416793823243, 'critic_loss': 4.543520143151283, 'q_loss': 4.355577208042145, 'v_loss': 0.1879429337605834, 'actor_loss': -1.750964260533452, 'time_step': 0.023092106342315673, 'td_error': 2.7100643803467235, 'value_scale': 4.84692806993359, 'discounted_advantage': -3.3868895569988666, 'initial_state': 5.62287712097168, 'diff_eval': 723.5126837946655} step=56000
2025-12-07 00:15.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_56000.d3


Epoch 57/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.16it/s, critic_loss=4.5, q_loss=4.31, v_loss=0.19, actor_loss=-1.79] 


2025-12-07 00:15.51 [info     ] IQL_20251206235012: epoch=57 step=57000 epoch=57 metrics={'time_sample_batch': 0.004957769870758057, 'time_algorithm_update': 0.01751309108734131, 'critic_loss': 4.501665793299675, 'q_loss': 4.311470993280411, 'v_loss': 0.19019480012357234, 'actor_loss': -1.7917408169955016, 'time_step': 0.02274638032913208, 'td_error': 2.3369980202722638, 'value_scale': 4.806491689778371, 'discounted_advantage': -2.388297276216995, 'initial_state': 7.162560939788818, 'diff_eval': 771.7211494129878} step=57000
2025-12-07 00:15.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_57000.d3


Epoch 58/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.07it/s, critic_loss=4.57, q_loss=4.38, v_loss=0.19, actor_loss=-1.76]


2025-12-07 00:16.17 [info     ] IQL_20251206235012: epoch=58 step=58000 epoch=58 metrics={'time_sample_batch': 0.004826937913894653, 'time_algorithm_update': 0.017184616088867188, 'critic_loss': 4.564772975683212, 'q_loss': 4.375367271780967, 'v_loss': 0.18940570433437826, 'actor_loss': -1.76501512542367, 'time_step': 0.022277875900268553, 'td_error': 2.5241666301066723, 'value_scale': 4.599578090025345, 'discounted_advantage': -2.6379132879183693, 'initial_state': 5.36299991607666, 'diff_eval': 638.0880672209365} step=58000
2025-12-07 00:16.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_58000.d3


Epoch 59/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.06it/s, critic_loss=4.41, q_loss=4.22, v_loss=0.189, actor_loss=-1.85]


2025-12-07 00:16.43 [info     ] IQL_20251206235012: epoch=59 step=59000 epoch=59 metrics={'time_sample_batch': 0.0048007769584655765, 'time_algorithm_update': 0.01721292304992676, 'critic_loss': 4.413844189405442, 'q_loss': 4.224644213676453, 'v_loss': 0.1891999767422676, 'actor_loss': -1.847529525578022, 'time_step': 0.02228056025505066, 'td_error': 2.565387952552683, 'value_scale': 4.87973607250131, 'discounted_advantage': -3.0130614836614265, 'initial_state': 7.200472354888916, 'diff_eval': 675.3104650087546} step=59000
2025-12-07 00:16.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_59000.d3


Epoch 60/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.42it/s, critic_loss=4.49, q_loss=4.3, v_loss=0.189, actor_loss=-1.78]


2025-12-07 00:17.09 [info     ] IQL_20251206235012: epoch=60 step=60000 epoch=60 metrics={'time_sample_batch': 0.004795857191085815, 'time_algorithm_update': 0.01703840684890747, 'critic_loss': 4.4908953865766525, 'q_loss': 4.301854517698288, 'v_loss': 0.1890408662110567, 'actor_loss': -1.7853741204291582, 'time_step': 0.022089046478271486, 'td_error': 2.5462177656122753, 'value_scale': 4.780991328077676, 'discounted_advantage': -2.6284793147545207, 'initial_state': 6.049612998962402, 'diff_eval': 605.4888887188931} step=60000
2025-12-07 00:17.10 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_60000.d3


Epoch 61/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.02it/s, critic_loss=4.51, q_loss=4.32, v_loss=0.188, actor_loss=-1.8]


2025-12-07 00:17.36 [info     ] IQL_20251206235012: epoch=61 step=61000 epoch=61 metrics={'time_sample_batch': 0.004879418849945068, 'time_algorithm_update': 0.017136672019958495, 'critic_loss': 4.5083262023925785, 'q_loss': 4.320403978466987, 'v_loss': 0.18792222730070351, 'actor_loss': -1.8033115330934524, 'time_step': 0.022287232160568236, 'td_error': 2.5250705700301626, 'value_scale': 4.654917135360055, 'discounted_advantage': -2.6882291141402135, 'initial_state': 6.338679313659668, 'diff_eval': 616.9347999487806} step=61000
2025-12-07 00:17.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_61000.d3


Epoch 62/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.13it/s, critic_loss=4.49, q_loss=4.3, v_loss=0.187, actor_loss=-1.89]


2025-12-07 00:18.02 [info     ] IQL_20251206235012: epoch=62 step=62000 epoch=62 metrics={'time_sample_batch': 0.0048492164611816406, 'time_algorithm_update': 0.017134753465652467, 'critic_loss': 4.49386060154438, 'q_loss': 4.306665347456932, 'v_loss': 0.1871952557116747, 'actor_loss': -1.8855428504645824, 'time_step': 0.022247360944747926, 'td_error': 2.49031988003853, 'value_scale': 4.614233428623866, 'discounted_advantage': -2.076283737657277, 'initial_state': 5.946156024932861, 'diff_eval': 624.887624271925} step=62000
2025-12-07 00:18.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_62000.d3


Epoch 63/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.81it/s, critic_loss=4.51, q_loss=4.32, v_loss=0.191, actor_loss=-1.89]


2025-12-07 00:18.28 [info     ] IQL_20251206235012: epoch=63 step=63000 epoch=63 metrics={'time_sample_batch': 0.0048254656791687014, 'time_algorithm_update': 0.017307142972946166, 'critic_loss': 4.504926665663719, 'q_loss': 4.3140437725782395, 'v_loss': 0.19088288602232933, 'actor_loss': -1.888937534302473, 'time_step': 0.02241370439529419, 'td_error': 2.7829514951382546, 'value_scale': 4.797323077381569, 'discounted_advantage': -3.0531747725546654, 'initial_state': 5.169045448303223, 'diff_eval': 690.9914495451296} step=63000
2025-12-07 00:18.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_63000.d3


Epoch 64/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.18it/s, critic_loss=4.49, q_loss=4.3, v_loss=0.193, actor_loss=-1.9] 


2025-12-07 00:18.54 [info     ] IQL_20251206235012: epoch=64 step=64000 epoch=64 metrics={'time_sample_batch': 0.004845449686050415, 'time_algorithm_update': 0.01711607503890991, 'critic_loss': 4.491748353004455, 'q_loss': 4.2985158925056455, 'v_loss': 0.1932324563637376, 'actor_loss': -1.887525945201516, 'time_step': 0.022223681211471558, 'td_error': 2.342868613843855, 'value_scale': 4.64553475630066, 'discounted_advantage': -1.1936696785702476, 'initial_state': 6.062260627746582, 'diff_eval': 781.7135613935304} step=64000
2025-12-07 00:18.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_64000.d3


Epoch 65/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.74it/s, critic_loss=4.51, q_loss=4.31, v_loss=0.192, actor_loss=-1.9]


2025-12-07 00:19.20 [info     ] IQL_20251206235012: epoch=65 step=65000 epoch=65 metrics={'time_sample_batch': 0.004735723972320556, 'time_algorithm_update': 0.01694154620170593, 'critic_loss': 4.5018544701337815, 'q_loss': 4.31023835337162, 'v_loss': 0.1916161197349429, 'actor_loss': -1.8962323158979415, 'time_step': 0.021948384761810304, 'td_error': 2.4854660934232076, 'value_scale': 4.735630045644939, 'discounted_advantage': -2.5595715487692763, 'initial_state': 6.332607269287109, 'diff_eval': 651.5757656649106} step=65000
2025-12-07 00:19.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_65000.d3


Epoch 66/200: 100%|██████████| 1000/1000 [00:24<00:00, 41.54it/s, critic_loss=4.51, q_loss=4.32, v_loss=0.188, actor_loss=-1.9]


2025-12-07 00:19.48 [info     ] IQL_20251206235012: epoch=66 step=66000 epoch=66 metrics={'time_sample_batch': 0.005300874948501587, 'time_algorithm_update': 0.017922298669815064, 'critic_loss': 4.505772916793823, 'q_loss': 4.317790822148323, 'v_loss': 0.18798209427297116, 'actor_loss': -1.9018529081046582, 'time_step': 0.023506627321243288, 'td_error': 2.46685931645185, 'value_scale': 4.7882719753180885, 'discounted_advantage': -2.41186170754632, 'initial_state': 6.149052619934082, 'diff_eval': 603.6246899589974} step=66000
2025-12-07 00:19.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_66000.d3


Epoch 67/200: 100%|██████████| 1000/1000 [00:23<00:00, 41.69it/s, critic_loss=4.49, q_loss=4.3, v_loss=0.189, actor_loss=-1.91]


2025-12-07 00:20.15 [info     ] IQL_20251206235012: epoch=67 step=67000 epoch=67 metrics={'time_sample_batch': 0.005177218675613404, 'time_algorithm_update': 0.018130990266799926, 'critic_loss': 4.490858564257622, 'q_loss': 4.301772931575775, 'v_loss': 0.18908563421666622, 'actor_loss': -1.913338522464037, 'time_step': 0.023570971727371215, 'td_error': 2.3373086239985597, 'value_scale': 4.813036594662444, 'discounted_advantage': -1.9821669804426671, 'initial_state': 6.288016319274902, 'diff_eval': 670.4940323992491} step=67000
2025-12-07 00:20.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_67000.d3


Epoch 68/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.64it/s, critic_loss=4.46, q_loss=4.27, v_loss=0.186, actor_loss=-1.82]


2025-12-07 00:20.41 [info     ] IQL_20251206235012: epoch=68 step=68000 epoch=68 metrics={'time_sample_batch': 0.004759943723678589, 'time_algorithm_update': 0.016983495473861694, 'critic_loss': 4.454724774122238, 'q_loss': 4.268404394984246, 'v_loss': 0.1863203848004341, 'actor_loss': -1.8205742498636246, 'time_step': 0.021994019269943238, 'td_error': 2.1834429679007377, 'value_scale': 4.510609190807423, 'discounted_advantage': -1.3076453930984389, 'initial_state': 6.832576274871826, 'diff_eval': 850.2543359382628} step=68000
2025-12-07 00:20.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_68000.d3


Epoch 69/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.54it/s, critic_loss=4.48, q_loss=4.29, v_loss=0.187, actor_loss=-1.95]


2025-12-07 00:21.07 [info     ] IQL_20251206235012: epoch=69 step=69000 epoch=69 metrics={'time_sample_batch': 0.004764254808425903, 'time_algorithm_update': 0.017026978015899658, 'critic_loss': 4.48071635723114, 'q_loss': 4.293861079335213, 'v_loss': 0.18685527932643892, 'actor_loss': -1.9478065867275, 'time_step': 0.0220494544506073, 'td_error': 2.6145766271413504, 'value_scale': 4.7073906919538775, 'discounted_advantage': -3.0952075893260953, 'initial_state': 5.7533488273620605, 'diff_eval': 594.910057377589} step=69000
2025-12-07 00:21.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_69000.d3


Epoch 70/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.59it/s, critic_loss=4.53, q_loss=4.34, v_loss=0.19, actor_loss=-1.91]


2025-12-07 00:21.34 [info     ] IQL_20251206235012: epoch=70 step=70000 epoch=70 metrics={'time_sample_batch': 0.004951956272125244, 'time_algorithm_update': 0.017731350183486937, 'critic_loss': 4.5245542610883716, 'q_loss': 4.334026331424713, 'v_loss': 0.19052792863547802, 'actor_loss': -1.906872754663229, 'time_step': 0.022980091094970703, 'td_error': 2.7350902257164025, 'value_scale': 4.513944242355684, 'discounted_advantage': -3.2278342896566574, 'initial_state': 4.8607306480407715, 'diff_eval': 724.3419747285019} step=70000
2025-12-07 00:21.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_70000.d3


Epoch 71/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.22it/s, critic_loss=4.41, q_loss=4.22, v_loss=0.189, actor_loss=-1.9]


2025-12-07 00:22.01 [info     ] IQL_20251206235012: epoch=71 step=71000 epoch=71 metrics={'time_sample_batch': 0.005026232481002807, 'time_algorithm_update': 0.017396690368652344, 'critic_loss': 4.416751194238663, 'q_loss': 4.228078663945198, 'v_loss': 0.18867253021895886, 'actor_loss': -1.903007232427597, 'time_step': 0.022692517280578615, 'td_error': 2.7921618614963775, 'value_scale': 4.735966609751108, 'discounted_advantage': -2.4235084865746184, 'initial_state': 5.189634799957275, 'diff_eval': 579.4616213920032} step=71000
2025-12-07 00:22.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_71000.d3


Epoch 72/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.49it/s, critic_loss=4.5, q_loss=4.31, v_loss=0.187, actor_loss=-1.97]


2025-12-07 00:22.28 [info     ] IQL_20251206235012: epoch=72 step=72000 epoch=72 metrics={'time_sample_batch': 0.0048833882808685305, 'time_algorithm_update': 0.017429433822631835, 'critic_loss': 4.507968674302101, 'q_loss': 4.320772395968437, 'v_loss': 0.1871962763592601, 'actor_loss': -1.9655728188455104, 'time_step': 0.022569475173950197, 'td_error': 2.582174124103083, 'value_scale': 4.670839568425972, 'discounted_advantage': -2.7039720626525883, 'initial_state': 5.786674976348877, 'diff_eval': 593.9753966449844} step=72000
2025-12-07 00:22.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_72000.d3


Epoch 73/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.75it/s, critic_loss=4.45, q_loss=4.26, v_loss=0.19, actor_loss=-2]   


2025-12-07 00:22.54 [info     ] IQL_20251206235012: epoch=73 step=73000 epoch=73 metrics={'time_sample_batch': 0.00488879132270813, 'time_algorithm_update': 0.017278821229934693, 'critic_loss': 4.446993669748307, 'q_loss': 4.257475537180901, 'v_loss': 0.1895181355178356, 'actor_loss': -1.999908150702715, 'time_step': 0.022431624174118044, 'td_error': 2.4326224445821976, 'value_scale': 4.89596422815622, 'discounted_advantage': -2.52669512803326, 'initial_state': 6.732803821563721, 'diff_eval': 586.6040804229417} step=73000
2025-12-07 00:22.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_73000.d3


Epoch 74/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.56it/s, critic_loss=4.48, q_loss=4.3, v_loss=0.183, actor_loss=-1.89]


2025-12-07 00:23.20 [info     ] IQL_20251206235012: epoch=74 step=74000 epoch=74 metrics={'time_sample_batch': 0.004790765523910522, 'time_algorithm_update': 0.017001869678497313, 'critic_loss': 4.478625954866409, 'q_loss': 4.295798100829124, 'v_loss': 0.18282785366475582, 'actor_loss': -1.89312632599473, 'time_step': 0.02204733633995056, 'td_error': 2.4492084692505385, 'value_scale': 4.5902899669177675, 'discounted_advantage': -2.691814841637578, 'initial_state': 6.286053657531738, 'diff_eval': 574.6364923440649} step=74000
2025-12-07 00:23.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_74000.d3


Epoch 75/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.97it/s, critic_loss=4.48, q_loss=4.29, v_loss=0.184, actor_loss=-1.97]


2025-12-07 00:23.46 [info     ] IQL_20251206235012: epoch=75 step=75000 epoch=75 metrics={'time_sample_batch': 0.004891732215881348, 'time_algorithm_update': 0.017189566373825075, 'critic_loss': 4.477024252295494, 'q_loss': 4.29185914683342, 'v_loss': 0.18516510304808617, 'actor_loss': -1.9692979621738196, 'time_step': 0.022338369369506835, 'td_error': 2.500031579008469, 'value_scale': 4.541782560543612, 'discounted_advantage': -2.6554151514754394, 'initial_state': 5.857842922210693, 'diff_eval': 549.2615128170806} step=75000
2025-12-07 00:23.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_75000.d3


Epoch 76/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.05it/s, critic_loss=4.48, q_loss=4.3, v_loss=0.184, actor_loss=-1.96]


2025-12-07 00:24.12 [info     ] IQL_20251206235012: epoch=76 step=76000 epoch=76 metrics={'time_sample_batch': 0.004821323871612549, 'time_algorithm_update': 0.017215006589889528, 'critic_loss': 4.479509200692177, 'q_loss': 4.295601254463196, 'v_loss': 0.18390794693678617, 'actor_loss': -1.9558562395870687, 'time_step': 0.022299051761627196, 'td_error': 2.193825867958139, 'value_scale': 4.67820363636195, 'discounted_advantage': -1.636657075811325, 'initial_state': 6.415988922119141, 'diff_eval': 682.3615755272438} step=76000
2025-12-07 00:24.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_76000.d3


Epoch 77/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.66it/s, critic_loss=4.38, q_loss=4.19, v_loss=0.186, actor_loss=-2.01]


2025-12-07 00:24.39 [info     ] IQL_20251206235012: epoch=77 step=77000 epoch=77 metrics={'time_sample_batch': 0.004904168367385864, 'time_algorithm_update': 0.01731204915046692, 'critic_loss': 4.382232842445373, 'q_loss': 4.196622041106224, 'v_loss': 0.18561079508811235, 'actor_loss': -2.0096404819115996, 'time_step': 0.022485972881317137, 'td_error': 2.5485386060417343, 'value_scale': 4.821188319825754, 'discounted_advantage': -2.55731624693852, 'initial_state': 5.793686389923096, 'diff_eval': 673.977777930287} step=77000
2025-12-07 00:24.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_77000.d3


Epoch 78/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.78it/s, critic_loss=4.29, q_loss=4.11, v_loss=0.181, actor_loss=-1.93]


2025-12-07 00:25.06 [info     ] IQL_20251206235012: epoch=78 step=78000 epoch=78 metrics={'time_sample_batch': 0.00501671576499939, 'time_algorithm_update': 0.01763025212287903, 'critic_loss': 4.297377575039864, 'q_loss': 4.116650010943413, 'v_loss': 0.18072756231576204, 'actor_loss': -1.9324661917537451, 'time_step': 0.022934102535247802, 'td_error': 2.6347658288096456, 'value_scale': 4.703873865191036, 'discounted_advantage': -3.043507039867832, 'initial_state': 5.5630784034729, 'diff_eval': 678.4928381302935} step=78000
2025-12-07 00:25.06 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_78000.d3


Epoch 79/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.19it/s, critic_loss=4.4, q_loss=4.22, v_loss=0.183, actor_loss=-1.94]


2025-12-07 00:25.32 [info     ] IQL_20251206235012: epoch=79 step=79000 epoch=79 metrics={'time_sample_batch': 0.004823998212814331, 'time_algorithm_update': 0.01712271499633789, 'critic_loss': 4.401593980312348, 'q_loss': 4.218601879000664, 'v_loss': 0.18299209911376238, 'actor_loss': -1.9285161190479994, 'time_step': 0.02221619462966919, 'td_error': 2.5520386816400866, 'value_scale': 4.692359482537727, 'discounted_advantage': -2.031386159663365, 'initial_state': 5.776055335998535, 'diff_eval': 601.8739265667474} step=79000
2025-12-07 00:25.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_79000.d3


Epoch 80/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.29it/s, critic_loss=4.39, q_loss=4.21, v_loss=0.178, actor_loss=-2]  


2025-12-07 00:25.58 [info     ] IQL_20251206235012: epoch=80 step=80000 epoch=80 metrics={'time_sample_batch': 0.004802741765975952, 'time_algorithm_update': 0.01711124515533447, 'critic_loss': 4.381448179721832, 'q_loss': 4.203370040774345, 'v_loss': 0.17807814142107964, 'actor_loss': -1.996739849895239, 'time_step': 0.022177964448928833, 'td_error': 2.375209988098938, 'value_scale': 4.785787526169154, 'discounted_advantage': -2.3246579997095735, 'initial_state': 6.4859395027160645, 'diff_eval': 543.8627266190014} step=80000
2025-12-07 00:25.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_80000.d3


Epoch 81/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.43it/s, critic_loss=4.35, q_loss=4.16, v_loss=0.183, actor_loss=-1.98]


2025-12-07 00:26.24 [info     ] IQL_20251206235012: epoch=81 step=81000 epoch=81 metrics={'time_sample_batch': 0.004795340776443481, 'time_algorithm_update': 0.017048876285552978, 'critic_loss': 4.344480124592781, 'q_loss': 4.161148496985436, 'v_loss': 0.18333162339031697, 'actor_loss': -1.9779615642428399, 'time_step': 0.022095394134521485, 'td_error': 2.570818955275012, 'value_scale': 4.858434932231466, 'discounted_advantage': -3.2929578083291187, 'initial_state': 6.695358753204346, 'diff_eval': 610.2004627137575} step=81000
2025-12-07 00:26.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_81000.d3


Epoch 82/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.33it/s, critic_loss=4.33, q_loss=4.15, v_loss=0.181, actor_loss=-1.96]


2025-12-07 00:26.50 [info     ] IQL_20251206235012: epoch=82 step=82000 epoch=82 metrics={'time_sample_batch': 0.004852848529815674, 'time_algorithm_update': 0.017042294025421144, 'critic_loss': 4.3397490046024325, 'q_loss': 4.159083091020584, 'v_loss': 0.18066591326892376, 'actor_loss': -1.9599145864844323, 'time_step': 0.022147929906845092, 'td_error': 2.4971083061009898, 'value_scale': 4.431675276879068, 'discounted_advantage': -2.3295577966441257, 'initial_state': 5.318871021270752, 'diff_eval': 580.4798992399941} step=82000
2025-12-07 00:26.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_82000.d3


Epoch 83/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.07it/s, critic_loss=4.45, q_loss=4.27, v_loss=0.183, actor_loss=-1.87]


2025-12-07 00:27.17 [info     ] IQL_20251206235012: epoch=83 step=83000 epoch=83 metrics={'time_sample_batch': 0.004934966802597046, 'time_algorithm_update': 0.017572948932647704, 'critic_loss': 4.444350732803345, 'q_loss': 4.261877558231354, 'v_loss': 0.18247317384928466, 'actor_loss': -1.8755571893751621, 'time_step': 0.022789443254470826, 'td_error': 2.510643451288649, 'value_scale': 4.701502607875384, 'discounted_advantage': -2.4421830880387665, 'initial_state': 5.798135280609131, 'diff_eval': 540.9362317462821} step=83000
2025-12-07 00:27.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_83000.d3


Epoch 84/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.18it/s, critic_loss=4.41, q_loss=4.23, v_loss=0.181, actor_loss=-2.09]


2025-12-07 00:27.43 [info     ] IQL_20251206235012: epoch=84 step=84000 epoch=84 metrics={'time_sample_batch': 0.004831377506256103, 'time_algorithm_update': 0.01711460614204407, 'critic_loss': 4.415280735969543, 'q_loss': 4.234445848286152, 'v_loss': 0.18083489261567592, 'actor_loss': -2.093313662737608, 'time_step': 0.022205002307891845, 'td_error': 2.5173087661846294, 'value_scale': 4.764551679325061, 'discounted_advantage': -3.0046980999965425, 'initial_state': 6.283106803894043, 'diff_eval': 584.0616069703709} step=84000
2025-12-07 00:27.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_84000.d3


Epoch 85/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.91it/s, critic_loss=4.35, q_loss=4.17, v_loss=0.183, actor_loss=-1.97]


2025-12-07 00:28.09 [info     ] IQL_20251206235012: epoch=85 step=85000 epoch=85 metrics={'time_sample_batch': 0.004699096918106079, 'time_algorithm_update': 0.016883551597595216, 'critic_loss': 4.357934318184853, 'q_loss': 4.17494484937191, 'v_loss': 0.1829894642457366, 'actor_loss': -1.97404619204998, 'time_step': 0.02183446168899536, 'td_error': 2.760605501023465, 'value_scale': 4.379440173915801, 'discounted_advantage': -2.5194075806672034, 'initial_state': 4.532960414886475, 'diff_eval': 535.6712917191466} step=85000
2025-12-07 00:28.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_85000.d3


Epoch 86/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.92it/s, critic_loss=4.36, q_loss=4.18, v_loss=0.183, actor_loss=-2.06]


2025-12-07 00:28.35 [info     ] IQL_20251206235012: epoch=86 step=86000 epoch=86 metrics={'time_sample_batch': 0.0047915086746215824, 'time_algorithm_update': 0.017276426076889038, 'critic_loss': 4.365554248690605, 'q_loss': 4.1826528202295306, 'v_loss': 0.18290142665803433, 'actor_loss': -2.059459432810545, 'time_step': 0.022336005687713625, 'td_error': 2.3122833940548415, 'value_scale': 4.841357476123642, 'discounted_advantage': -2.3164198345052562, 'initial_state': 6.518145561218262, 'diff_eval': 570.8660527596597} step=86000
2025-12-07 00:28.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_86000.d3


Epoch 87/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.20it/s, critic_loss=4.37, q_loss=4.19, v_loss=0.184, actor_loss=-2.07]


2025-12-07 00:29.02 [info     ] IQL_20251206235012: epoch=87 step=87000 epoch=87 metrics={'time_sample_batch': 0.004951223850250244, 'time_algorithm_update': 0.017512221097946166, 'critic_loss': 4.360434105157852, 'q_loss': 4.176835047125817, 'v_loss': 0.18359905972331764, 'actor_loss': -2.0705378711223603, 'time_step': 0.022734375715255738, 'td_error': 2.405668400803165, 'value_scale': 4.76289402288779, 'discounted_advantage': -2.454013270349763, 'initial_state': 7.03129768371582, 'diff_eval': 494.8526010186} step=87000
2025-12-07 00:29.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_87000.d3


Epoch 88/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.46it/s, critic_loss=4.37, q_loss=4.18, v_loss=0.192, actor_loss=-2.07]


2025-12-07 00:29.29 [info     ] IQL_20251206235012: epoch=88 step=88000 epoch=88 metrics={'time_sample_batch': 0.004910139083862305, 'time_algorithm_update': 0.01736090111732483, 'critic_loss': 4.366557402253151, 'q_loss': 4.174933408379554, 'v_loss': 0.19162399291247129, 'actor_loss': -2.0662245233356953, 'time_step': 0.022553997039794924, 'td_error': 2.332583562853142, 'value_scale': 4.673185698441733, 'discounted_advantage': -2.2317725587351727, 'initial_state': 6.383502006530762, 'diff_eval': 621.4852804434859} step=88000
2025-12-07 00:29.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_88000.d3


Epoch 89/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.34it/s, critic_loss=4.38, q_loss=4.19, v_loss=0.188, actor_loss=-2.07]


2025-12-07 00:29.55 [info     ] IQL_20251206235012: epoch=89 step=89000 epoch=89 metrics={'time_sample_batch': 0.004778319358825683, 'time_algorithm_update': 0.017101848125457765, 'critic_loss': 4.376244228839874, 'q_loss': 4.18849613893032, 'v_loss': 0.18774808529764414, 'actor_loss': -2.0719064207226037, 'time_step': 0.022142750263214113, 'td_error': 2.481676064655025, 'value_scale': 4.616628406100968, 'discounted_advantage': -2.588970253497947, 'initial_state': 6.644418716430664, 'diff_eval': 522.7375781725314} step=89000
2025-12-07 00:29.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_89000.d3


Epoch 90/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.33it/s, critic_loss=4.34, q_loss=4.15, v_loss=0.183, actor_loss=-1.99]


2025-12-07 00:30.21 [info     ] IQL_20251206235012: epoch=90 step=90000 epoch=90 metrics={'time_sample_batch': 0.004786426067352295, 'time_algorithm_update': 0.01709856367111206, 'critic_loss': 4.334743495583535, 'q_loss': 4.1515796407461165, 'v_loss': 0.18316385792195797, 'actor_loss': -1.9860842681229114, 'time_step': 0.022140419960021973, 'td_error': 2.61908621131624, 'value_scale': 4.536343865700016, 'discounted_advantage': -2.638122847931769, 'initial_state': 5.048854351043701, 'diff_eval': 503.5769705503781} step=90000
2025-12-07 00:30.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_90000.d3


Epoch 91/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.74it/s, critic_loss=4.34, q_loss=4.15, v_loss=0.185, actor_loss=-2.14]


2025-12-07 00:30.47 [info     ] IQL_20251206235012: epoch=91 step=91000 epoch=91 metrics={'time_sample_batch': 0.0051827914714813236, 'time_algorithm_update': 0.01704334330558777, 'critic_loss': 4.340607398033142, 'q_loss': 4.155741054058075, 'v_loss': 0.18486634413152933, 'actor_loss': -2.1353262479007245, 'time_step': 0.022473712682724, 'td_error': 2.715595997198141, 'value_scale': 4.745054305773868, 'discounted_advantage': -2.5670751166223034, 'initial_state': 5.487375259399414, 'diff_eval': 598.6907546596378} step=91000
2025-12-07 00:30.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_91000.d3


Epoch 92/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.33it/s, critic_loss=4.34, q_loss=4.15, v_loss=0.188, actor_loss=-2.05]


2025-12-07 00:31.14 [info     ] IQL_20251206235012: epoch=92 step=92000 epoch=92 metrics={'time_sample_batch': 0.0050477683544158936, 'time_algorithm_update': 0.0178630850315094, 'critic_loss': 4.344901341557502, 'q_loss': 4.157358132123947, 'v_loss': 0.18754320731759072, 'actor_loss': -2.0508183296620848, 'time_step': 0.023204321384429932, 'td_error': 2.5944099514398875, 'value_scale': 4.657474919207463, 'discounted_advantage': -2.577055808855776, 'initial_state': 6.003786563873291, 'diff_eval': 536.0618688710325} step=92000
2025-12-07 00:31.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_92000.d3


Epoch 93/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.18it/s, critic_loss=4.36, q_loss=4.17, v_loss=0.186, actor_loss=-2.11]


2025-12-07 00:31.42 [info     ] IQL_20251206235012: epoch=93 step=93000 epoch=93 metrics={'time_sample_batch': 0.004777365684509277, 'time_algorithm_update': 0.018256882667541503, 'critic_loss': 4.363425957322121, 'q_loss': 4.177249183654785, 'v_loss': 0.18617677243798972, 'actor_loss': -2.1079295082092284, 'time_step': 0.02329874277114868, 'td_error': 2.718124605316464, 'value_scale': 4.693179480730707, 'discounted_advantage': -3.4625854565452743, 'initial_state': 5.769916534423828, 'diff_eval': 591.6396896597763} step=93000
2025-12-07 00:31.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_93000.d3


Epoch 94/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.13it/s, critic_loss=4.36, q_loss=4.17, v_loss=0.185, actor_loss=-2.06]


2025-12-07 00:32.08 [info     ] IQL_20251206235012: epoch=94 step=94000 epoch=94 metrics={'time_sample_batch': 0.004831872701644898, 'time_algorithm_update': 0.0171692955493927, 'critic_loss': 4.363688334822655, 'q_loss': 4.178671417355537, 'v_loss': 0.18501691119372846, 'actor_loss': -2.067265939310193, 'time_step': 0.02225069308280945, 'td_error': 2.417290202305242, 'value_scale': 4.701673691940072, 'discounted_advantage': -2.6464528370122786, 'initial_state': 7.1651611328125, 'diff_eval': 505.06090499436516} step=94000
2025-12-07 00:32.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_94000.d3


Epoch 95/200: 100%|██████████| 1000/1000 [00:22<00:00, 45.45it/s, critic_loss=4.31, q_loss=4.12, v_loss=0.187, actor_loss=-2.08]


2025-12-07 00:32.33 [info     ] IQL_20251206235012: epoch=95 step=95000 epoch=95 metrics={'time_sample_batch': 0.00457445764541626, 'time_algorithm_update': 0.016787832736968995, 'critic_loss': 4.3065729545354845, 'q_loss': 4.119712402105331, 'v_loss': 0.1868605483621359, 'actor_loss': -2.0801345069110395, 'time_step': 0.021604088306427002, 'td_error': 2.4958741703161755, 'value_scale': 4.754999583387202, 'discounted_advantage': -2.6783408743197317, 'initial_state': 6.755366325378418, 'diff_eval': 498.09082784843633} step=95000
2025-12-07 00:32.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_95000.d3


Epoch 96/200: 100%|██████████| 1000/1000 [00:22<00:00, 45.15it/s, critic_loss=4.36, q_loss=4.18, v_loss=0.182, actor_loss=-2.14]


2025-12-07 00:32.59 [info     ] IQL_20251206235012: epoch=96 step=96000 epoch=96 metrics={'time_sample_batch': 0.0046510472297668455, 'time_algorithm_update': 0.01683737587928772, 'critic_loss': 4.359772528409958, 'q_loss': 4.177494294881821, 'v_loss': 0.18227822940796615, 'actor_loss': -2.1390119235515597, 'time_step': 0.02174431252479553, 'td_error': 2.419860394777837, 'value_scale': 4.606769060959597, 'discounted_advantage': -2.2420690656233235, 'initial_state': 6.441617965698242, 'diff_eval': 496.9142251703418} step=96000
2025-12-07 00:32.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_96000.d3


Epoch 97/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.88it/s, critic_loss=4.28, q_loss=4.1, v_loss=0.181, actor_loss=-2.03]


2025-12-07 00:33.25 [info     ] IQL_20251206235012: epoch=97 step=97000 epoch=97 metrics={'time_sample_batch': 0.0047382314205169675, 'time_algorithm_update': 0.016870342969894408, 'critic_loss': 4.284831159591675, 'q_loss': 4.103567433595657, 'v_loss': 0.18126372608542443, 'actor_loss': -2.0224159681648017, 'time_step': 0.021857097864151, 'td_error': 2.578909166271656, 'value_scale': 4.647503628274496, 'discounted_advantage': -1.5462097340800647, 'initial_state': 5.185359001159668, 'diff_eval': 537.4356627762751} step=97000
2025-12-07 00:33.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_97000.d3


Epoch 98/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.97it/s, critic_loss=4.34, q_loss=4.15, v_loss=0.186, actor_loss=-2.14]


2025-12-07 00:33.51 [info     ] IQL_20251206235012: epoch=98 step=98000 epoch=98 metrics={'time_sample_batch': 0.004835913419723511, 'time_algorithm_update': 0.017225653409957884, 'critic_loss': 4.3322089800834656, 'q_loss': 4.146210093617439, 'v_loss': 0.1859988878145814, 'actor_loss': -2.136690182894468, 'time_step': 0.02232435750961304, 'td_error': 2.287150811994247, 'value_scale': 4.474371196963642, 'discounted_advantage': -1.464865777952405, 'initial_state': 6.743652820587158, 'diff_eval': 498.3722867926398} step=98000
2025-12-07 00:33.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_98000.d3


Epoch 99/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.23it/s, critic_loss=4.35, q_loss=4.17, v_loss=0.183, actor_loss=-2.19]


2025-12-07 00:34.17 [info     ] IQL_20251206235012: epoch=99 step=99000 epoch=99 metrics={'time_sample_batch': 0.004773011207580566, 'time_algorithm_update': 0.017159244298934936, 'critic_loss': 4.353601066470146, 'q_loss': 4.170257622659206, 'v_loss': 0.18334344147145748, 'actor_loss': -2.191753112643957, 'time_step': 0.022186900138854982, 'td_error': 2.398603081588634, 'value_scale': 4.584994307451406, 'discounted_advantage': -2.4027317080096453, 'initial_state': 5.9733195304870605, 'diff_eval': 507.68054283068307} step=99000
2025-12-07 00:34.18 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_99000.d3


Epoch 100/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.24it/s, critic_loss=4.29, q_loss=4.11, v_loss=0.186, actor_loss=-2.13]


2025-12-07 00:34.44 [info     ] IQL_20251206235012: epoch=100 step=100000 epoch=100 metrics={'time_sample_batch': 0.004931094884872436, 'time_algorithm_update': 0.017514750480651854, 'critic_loss': 4.289989356160164, 'q_loss': 4.103454902887345, 'v_loss': 0.18653445701301097, 'actor_loss': -2.123798484325409, 'time_step': 0.022702975273132323, 'td_error': 2.5584391588199034, 'value_scale': 4.807203253012928, 'discounted_advantage': -2.956458305682949, 'initial_state': 7.161249160766602, 'diff_eval': 540.4193376347242} step=100000
2025-12-07 00:34.44 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_100000.d3


Epoch 101/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.87it/s, critic_loss=4.34, q_loss=4.16, v_loss=0.187, actor_loss=-2.15]


2025-12-07 00:35.10 [info     ] IQL_20251206235012: epoch=101 step=101000 epoch=101 metrics={'time_sample_batch': 0.004781051397323609, 'time_algorithm_update': 0.017299864530563355, 'critic_loss': 4.349820942878723, 'q_loss': 4.1625846617221836, 'v_loss': 0.1872362887635827, 'actor_loss': -2.146526083096862, 'time_step': 0.02234871292114258, 'td_error': 2.5051959175799894, 'value_scale': 4.701860667908937, 'discounted_advantage': -2.2344326415785583, 'initial_state': 6.210594654083252, 'diff_eval': 500.23193295521713} step=101000
2025-12-07 00:35.11 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_101000.d3


Epoch 102/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.81it/s, critic_loss=4.31, q_loss=4.12, v_loss=0.188, actor_loss=-2.25]


2025-12-07 00:35.37 [info     ] IQL_20251206235012: epoch=102 step=102000 epoch=102 metrics={'time_sample_batch': 0.004862167835235596, 'time_algorithm_update': 0.01730053210258484, 'critic_loss': 4.308743215560913, 'q_loss': 4.120848549246788, 'v_loss': 0.18789465627074242, 'actor_loss': -2.244387031108141, 'time_step': 0.022413212537765504, 'td_error': 2.5431449040337615, 'value_scale': 4.75207014137384, 'discounted_advantage': -2.883456602872389, 'initial_state': 6.527876377105713, 'diff_eval': 512.1618831633878} step=102000
2025-12-07 00:35.37 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_102000.d3


Epoch 103/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.11it/s, critic_loss=4.36, q_loss=4.18, v_loss=0.186, actor_loss=-2.09]


2025-12-07 00:36.04 [info     ] IQL_20251206235012: epoch=103 step=103000 epoch=103 metrics={'time_sample_batch': 0.00493039345741272, 'time_algorithm_update': 0.017579775333404542, 'critic_loss': 4.367481543779373, 'q_loss': 4.181607757687568, 'v_loss': 0.18587378535419702, 'actor_loss': -2.0871472500562667, 'time_step': 0.022775098323822022, 'td_error': 2.5342027907627185, 'value_scale': 4.680090144516281, 'discounted_advantage': -2.200950916773319, 'initial_state': 5.353460788726807, 'diff_eval': 460.8685383249774} step=103000
2025-12-07 00:36.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_103000.d3


Epoch 104/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.38it/s, critic_loss=4.34, q_loss=4.15, v_loss=0.187, actor_loss=-2.09]


2025-12-07 00:36.30 [info     ] IQL_20251206235012: epoch=104 step=104000 epoch=104 metrics={'time_sample_batch': 0.004954652070999145, 'time_algorithm_update': 0.017424502611160277, 'critic_loss': 4.343823330402374, 'q_loss': 4.157143844127655, 'v_loss': 0.18667947842925786, 'actor_loss': -2.093719893425703, 'time_step': 0.022645379304885863, 'td_error': 2.9086819997858497, 'value_scale': 5.016243946964151, 'discounted_advantage': -3.7026326528926066, 'initial_state': 7.015017509460449, 'diff_eval': 749.8690290095875} step=104000
2025-12-07 00:36.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_104000.d3


Epoch 105/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.40it/s, critic_loss=4.28, q_loss=4.1, v_loss=0.18, actor_loss=-2.14] 


2025-12-07 00:36.57 [info     ] IQL_20251206235012: epoch=105 step=105000 epoch=105 metrics={'time_sample_batch': 0.004795676946640014, 'time_algorithm_update': 0.017545544385910033, 'critic_loss': 4.27416842675209, 'q_loss': 4.094679014205933, 'v_loss': 0.17948940946161748, 'actor_loss': -2.1356954908370973, 'time_step': 0.022611562728881835, 'td_error': 2.659704854029331, 'value_scale': 4.466587575235266, 'discounted_advantage': -2.428026463189237, 'initial_state': 5.321290493011475, 'diff_eval': 469.20219063401} step=105000
2025-12-07 00:36.57 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_105000.d3


Epoch 106/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.70it/s, critic_loss=4.28, q_loss=4.1, v_loss=0.182, actor_loss=-2.11]


2025-12-07 00:37.23 [info     ] IQL_20251206235012: epoch=106 step=106000 epoch=106 metrics={'time_sample_batch': 0.00489938473701477, 'time_algorithm_update': 0.01732088541984558, 'critic_loss': 4.276754124879837, 'q_loss': 4.094323019981385, 'v_loss': 0.18243110882490873, 'actor_loss': -2.110562404870987, 'time_step': 0.022485579013824464, 'td_error': 2.34036496567514, 'value_scale': 4.714508207349383, 'discounted_advantage': -2.432222781525725, 'initial_state': 7.435302734375, 'diff_eval': 502.4015968299837} step=106000
2025-12-07 00:37.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_106000.d3


Epoch 107/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.75it/s, critic_loss=4.31, q_loss=4.14, v_loss=0.179, actor_loss=-2.14]


2025-12-07 00:37.49 [info     ] IQL_20251206235012: epoch=107 step=107000 epoch=107 metrics={'time_sample_batch': 0.004711044311523437, 'time_algorithm_update': 0.016974180698394774, 'critic_loss': 4.315265146970749, 'q_loss': 4.1369491579532625, 'v_loss': 0.17831598676741123, 'actor_loss': -2.1375035811960696, 'time_step': 0.02193162178993225, 'td_error': 2.7488034806088932, 'value_scale': 4.582859851179874, 'discounted_advantage': -1.6705166539986807, 'initial_state': 4.832176685333252, 'diff_eval': 548.7173480276022} step=107000
2025-12-07 00:37.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_107000.d3


Epoch 108/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.57it/s, critic_loss=4.31, q_loss=4.12, v_loss=0.187, actor_loss=-2.22]


2025-12-07 00:38.16 [info     ] IQL_20251206235012: epoch=108 step=108000 epoch=108 metrics={'time_sample_batch': 0.004885681629180908, 'time_algorithm_update': 0.017390894412994386, 'critic_loss': 4.311866426944733, 'q_loss': 4.1248644933700565, 'v_loss': 0.18700193317979574, 'actor_loss': -2.2252561135590074, 'time_step': 0.02253720998764038, 'td_error': 2.450121082746301, 'value_scale': 4.621809463211072, 'discounted_advantage': -2.3268235349636575, 'initial_state': 6.9736809730529785, 'diff_eval': 510.12797009544886} step=108000
2025-12-07 00:38.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_108000.d3


Epoch 109/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.29it/s, critic_loss=4.25, q_loss=4.07, v_loss=0.184, actor_loss=-2.17]


2025-12-07 00:38.42 [info     ] IQL_20251206235012: epoch=109 step=109000 epoch=109 metrics={'time_sample_batch': 0.004831001996994018, 'time_algorithm_update': 0.01706063771247864, 'critic_loss': 4.245377143740654, 'q_loss': 4.061362887620926, 'v_loss': 0.1840142540410161, 'actor_loss': -2.1664858973771333, 'time_step': 0.02215467119216919, 'td_error': 2.3673583155501547, 'value_scale': 4.5306127209119555, 'discounted_advantage': -2.1290245703866093, 'initial_state': 6.089293003082275, 'diff_eval': 526.7937210069705} step=109000
2025-12-07 00:38.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_109000.d3


Epoch 110/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.48it/s, critic_loss=4.24, q_loss=4.05, v_loss=0.188, actor_loss=-2.26]


2025-12-07 00:39.09 [info     ] IQL_20251206235012: epoch=110 step=110000 epoch=110 metrics={'time_sample_batch': 0.004821567058563232, 'time_algorithm_update': 0.017478636264801027, 'critic_loss': 4.242923124670982, 'q_loss': 4.054804512619972, 'v_loss': 0.18811860406398773, 'actor_loss': -2.2662852787673473, 'time_step': 0.022556437492370605, 'td_error': 2.3163558543169107, 'value_scale': 4.610619954918632, 'discounted_advantage': -2.7896052941493594, 'initial_state': 6.387462139129639, 'diff_eval': 472.7941951896062} step=110000
2025-12-07 00:39.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_110000.d3


Epoch 111/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.72it/s, critic_loss=4.25, q_loss=4.06, v_loss=0.187, actor_loss=-2.17]


2025-12-07 00:39.34 [info     ] IQL_20251206235012: epoch=111 step=111000 epoch=111 metrics={'time_sample_batch': 0.004730918169021606, 'time_algorithm_update': 0.016982634782791137, 'critic_loss': 4.247158697724342, 'q_loss': 4.060085500061512, 'v_loss': 0.18707319992780685, 'actor_loss': -2.1716192599236965, 'time_step': 0.021965791702270508, 'td_error': 2.423926602331179, 'value_scale': 4.651984455162692, 'discounted_advantage': -2.404063043597241, 'initial_state': 6.9578962326049805, 'diff_eval': 443.24674084735983} step=111000
2025-12-07 00:39.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_111000.d3


Epoch 112/200: 100%|██████████| 1000/1000 [00:24<00:00, 40.59it/s, critic_loss=4.28, q_loss=4.09, v_loss=0.186, actor_loss=-2.31]


2025-12-07 00:40.03 [info     ] IQL_20251206235012: epoch=112 step=112000 epoch=112 metrics={'time_sample_batch': 0.006018489599227905, 'time_algorithm_update': 0.017889240980148314, 'critic_loss': 4.27389274930954, 'q_loss': 4.088221365571022, 'v_loss': 0.18567138082534074, 'actor_loss': -2.3017817779779435, 'time_step': 0.024195422649383545, 'td_error': 2.478533161282566, 'value_scale': 4.737540318232501, 'discounted_advantage': -2.731458988786895, 'initial_state': 7.359813213348389, 'diff_eval': 509.5978226566626} step=112000
2025-12-07 00:40.03 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_112000.d3


Epoch 113/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.18it/s, critic_loss=4.29, q_loss=4.11, v_loss=0.185, actor_loss=-2.34]


2025-12-07 00:40.30 [info     ] IQL_20251206235012: epoch=113 step=113000 epoch=113 metrics={'time_sample_batch': 0.00481511926651001, 'time_algorithm_update': 0.017592153549194335, 'critic_loss': 4.303350844502449, 'q_loss': 4.1179971067905425, 'v_loss': 0.18535373666137456, 'actor_loss': -2.329458577901125, 'time_step': 0.022692230224609374, 'td_error': 2.9689804629876106, 'value_scale': 4.825665088941882, 'discounted_advantage': -3.419011900496146, 'initial_state': 5.042099952697754, 'diff_eval': 727.1350631748254} step=113000
2025-12-07 00:40.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_113000.d3


Epoch 114/200: 100%|██████████| 1000/1000 [00:24<00:00, 40.76it/s, critic_loss=4.25, q_loss=4.07, v_loss=0.179, actor_loss=-2.25]


2025-12-07 00:40.58 [info     ] IQL_20251206235012: epoch=114 step=114000 epoch=114 metrics={'time_sample_batch': 0.005267812252044678, 'time_algorithm_update': 0.01847671937942505, 'critic_loss': 4.249003729104996, 'q_loss': 4.069839458942413, 'v_loss': 0.17916427253186704, 'actor_loss': -2.2437584560513497, 'time_step': 0.024059306859970094, 'td_error': 2.2135874113847036, 'value_scale': 4.4724157065096914, 'discounted_advantage': -1.8008059906155132, 'initial_state': 6.383526802062988, 'diff_eval': 476.9911258318961} step=114000
2025-12-07 00:40.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_114000.d3


Epoch 115/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.46it/s, critic_loss=4.16, q_loss=3.97, v_loss=0.185, actor_loss=-2.2]


2025-12-07 00:41.26 [info     ] IQL_20251206235012: epoch=115 step=115000 epoch=115 metrics={'time_sample_batch': 0.005076208591461182, 'time_algorithm_update': 0.01774988865852356, 'critic_loss': 4.165605692863465, 'q_loss': 3.9802721351385117, 'v_loss': 0.18533356310427188, 'actor_loss': -2.195006074264646, 'time_step': 0.023108123064041138, 'td_error': 2.73980910457349, 'value_scale': 4.3874348528431035, 'discounted_advantage': -2.618926986097308, 'initial_state': 4.521032810211182, 'diff_eval': 450.99644855974384} step=115000
2025-12-07 00:41.26 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_115000.d3


Epoch 116/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.07it/s, critic_loss=4.13, q_loss=3.95, v_loss=0.183, actor_loss=-2.28]


2025-12-07 00:41.52 [info     ] IQL_20251206235012: epoch=116 step=116000 epoch=116 metrics={'time_sample_batch': 0.004893259763717651, 'time_algorithm_update': 0.017062907695770263, 'critic_loss': 4.133493927240372, 'q_loss': 3.950579005718231, 'v_loss': 0.1829149235263467, 'actor_loss': -2.275453744888306, 'time_step': 0.022232054471969603, 'td_error': 2.5763622472281753, 'value_scale': 4.475160210305545, 'discounted_advantage': -2.3108177754676573, 'initial_state': 5.694450855255127, 'diff_eval': 487.8443587603446} step=116000
2025-12-07 00:41.52 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_116000.d3


Epoch 117/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.34it/s, critic_loss=4.2, q_loss=4.01, v_loss=0.188, actor_loss=-2.31]


2025-12-07 00:42.19 [info     ] IQL_20251206235012: epoch=117 step=117000 epoch=117 metrics={'time_sample_batch': 0.004908914089202881, 'time_algorithm_update': 0.017471362829208373, 'critic_loss': 4.2048273131847385, 'q_loss': 4.016500543951988, 'v_loss': 0.18832677322626115, 'actor_loss': -2.3060435069650413, 'time_step': 0.022649652481079103, 'td_error': 2.470283772202497, 'value_scale': 4.612844079272308, 'discounted_advantage': -2.5969710726036475, 'initial_state': 6.415903091430664, 'diff_eval': 526.5138143953559} step=117000
2025-12-07 00:42.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_117000.d3


Epoch 118/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.43it/s, critic_loss=4.19, q_loss=4, v_loss=0.188, actor_loss=-2.2]   


2025-12-07 00:42.46 [info     ] IQL_20251206235012: epoch=118 step=118000 epoch=118 metrics={'time_sample_batch': 0.0048882818222045895, 'time_algorithm_update': 0.017463890075683594, 'critic_loss': 4.19512449002266, 'q_loss': 4.006804867506027, 'v_loss': 0.18831961785256862, 'actor_loss': -2.2074055933058263, 'time_step': 0.02261300253868103, 'td_error': 2.5392655190621025, 'value_scale': 4.243547138661202, 'discounted_advantage': -2.5257169896756295, 'initial_state': 4.63973331451416, 'diff_eval': 472.78755162941695} step=118000
2025-12-07 00:42.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_118000.d3


Epoch 119/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.18it/s, critic_loss=4.19, q_loss=4.01, v_loss=0.181, actor_loss=-2.33]


2025-12-07 00:43.12 [info     ] IQL_20251206235012: epoch=119 step=119000 epoch=119 metrics={'time_sample_batch': 0.00481699824333191, 'time_algorithm_update': 0.017622929096221925, 'critic_loss': 4.194677097678184, 'q_loss': 4.013133168816567, 'v_loss': 0.18154392871260644, 'actor_loss': -2.3260285596847536, 'time_step': 0.02271106481552124, 'td_error': 2.355234054876996, 'value_scale': 4.674360680044922, 'discounted_advantage': -2.2157107110602396, 'initial_state': 6.287215709686279, 'diff_eval': 481.34567183183975} step=119000
2025-12-07 00:43.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_119000.d3


Epoch 120/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.86it/s, critic_loss=4.2, q_loss=4.02, v_loss=0.182, actor_loss=-2.23]


2025-12-07 00:43.39 [info     ] IQL_20251206235012: epoch=120 step=120000 epoch=120 metrics={'time_sample_batch': 0.004869659662246704, 'time_algorithm_update': 0.017274978160858153, 'critic_loss': 4.19426076066494, 'q_loss': 4.012034457683563, 'v_loss': 0.18222630167752504, 'actor_loss': -2.226638831824064, 'time_step': 0.022400859355926513, 'td_error': 2.4314358673342196, 'value_scale': 4.65537272208688, 'discounted_advantage': -2.2199718552972953, 'initial_state': 6.151205062866211, 'diff_eval': 466.71561602187654} step=120000
2025-12-07 00:43.39 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_120000.d3


Epoch 121/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.97it/s, critic_loss=4.18, q_loss=4, v_loss=0.182, actor_loss=-2.24]  


2025-12-07 00:44.05 [info     ] IQL_20251206235012: epoch=121 step=121000 epoch=121 metrics={'time_sample_batch': 0.004829224109649658, 'time_algorithm_update': 0.01722731876373291, 'critic_loss': 4.178537005066872, 'q_loss': 3.996345971941948, 'v_loss': 0.18219103930145503, 'actor_loss': -2.2462259612679483, 'time_step': 0.022322834253311157, 'td_error': 2.4241698940141414, 'value_scale': 4.3473471866644235, 'discounted_advantage': -2.101006377676221, 'initial_state': 5.8270392417907715, 'diff_eval': 413.84052867056164} step=121000
2025-12-07 00:44.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_121000.d3


Epoch 122/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.70it/s, critic_loss=4.21, q_loss=4.02, v_loss=0.188, actor_loss=-2.2]


2025-12-07 00:44.31 [info     ] IQL_20251206235012: epoch=122 step=122000 epoch=122 metrics={'time_sample_batch': 0.004864554405212402, 'time_algorithm_update': 0.017351431369781494, 'critic_loss': 4.210611071348191, 'q_loss': 4.022458745002747, 'v_loss': 0.18815232295542955, 'actor_loss': -2.2026656202077866, 'time_step': 0.02247528052330017, 'td_error': 2.3469724908547134, 'value_scale': 4.453206209024791, 'discounted_advantage': -1.9929105321797569, 'initial_state': 5.596230506896973, 'diff_eval': 470.95679754000645} step=122000
2025-12-07 00:44.31 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_122000.d3


Epoch 123/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.60it/s, critic_loss=4.21, q_loss=4.02, v_loss=0.187, actor_loss=-2.33]


2025-12-07 00:44.58 [info     ] IQL_20251206235012: epoch=123 step=123000 epoch=123 metrics={'time_sample_batch': 0.004876081943511963, 'time_algorithm_update': 0.017380788803100587, 'critic_loss': 4.203430795431137, 'q_loss': 4.0172678011655805, 'v_loss': 0.18616299546509982, 'actor_loss': -2.330157219439745, 'time_step': 0.02251892113685608, 'td_error': 2.490145003562579, 'value_scale': 4.470309290147331, 'discounted_advantage': -2.638329412938265, 'initial_state': 6.634868144989014, 'diff_eval': 575.6563257524577} step=123000
2025-12-07 00:44.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_123000.d3


Epoch 124/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.03it/s, critic_loss=4.16, q_loss=3.97, v_loss=0.186, actor_loss=-2.17]


2025-12-07 00:45.24 [info     ] IQL_20251206235012: epoch=124 step=124000 epoch=124 metrics={'time_sample_batch': 0.004807495832443237, 'time_algorithm_update': 0.01723495411872864, 'critic_loss': 4.1554530169963835, 'q_loss': 3.9694549721479415, 'v_loss': 0.1859980431124568, 'actor_loss': -2.173769071817398, 'time_step': 0.022297621488571166, 'td_error': 2.3929644664072796, 'value_scale': 4.513582165698831, 'discounted_advantage': -2.7719986789574715, 'initial_state': 6.309927940368652, 'diff_eval': 451.35279307178484} step=124000
2025-12-07 00:45.24 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_124000.d3


Epoch 125/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.42it/s, critic_loss=4.2, q_loss=4.01, v_loss=0.187, actor_loss=-2.36]


2025-12-07 00:45.50 [info     ] IQL_20251206235012: epoch=125 step=125000 epoch=125 metrics={'time_sample_batch': 0.004743612766265869, 'time_algorithm_update': 0.017110944986343385, 'critic_loss': 4.1994638377428055, 'q_loss': 4.012185816526413, 'v_loss': 0.1872780163884163, 'actor_loss': -2.3632870852053167, 'time_step': 0.022090052366256715, 'td_error': 2.3784317536794086, 'value_scale': 4.259082720679764, 'discounted_advantage': -1.4387562229485433, 'initial_state': 6.005155086517334, 'diff_eval': 442.0348193041904} step=125000
2025-12-07 00:45.50 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_125000.d3


Epoch 126/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.42it/s, critic_loss=4.16, q_loss=3.97, v_loss=0.19, actor_loss=-2.33]


2025-12-07 00:46.17 [info     ] IQL_20251206235012: epoch=126 step=126000 epoch=126 metrics={'time_sample_batch': 0.0049469366073608394, 'time_algorithm_update': 0.017443745851516725, 'critic_loss': 4.158535369753838, 'q_loss': 3.969212319135666, 'v_loss': 0.18932305724173784, 'actor_loss': -2.3319390822798014, 'time_step': 0.022662700414657594, 'td_error': 2.514215498988403, 'value_scale': 4.629482897808845, 'discounted_advantage': -2.16315542071752, 'initial_state': 6.391484260559082, 'diff_eval': 510.05160463488744} step=126000
2025-12-07 00:46.17 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_126000.d3


Epoch 127/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.14it/s, critic_loss=4.18, q_loss=3.99, v_loss=0.184, actor_loss=-2.33]


2025-12-07 00:46.43 [info     ] IQL_20251206235012: epoch=127 step=127000 epoch=127 metrics={'time_sample_batch': 0.004826815843582154, 'time_algorithm_update': 0.017157571077346803, 'critic_loss': 4.18187436068058, 'q_loss': 3.9975585564374922, 'v_loss': 0.184315802924335, 'actor_loss': -2.3252679177820683, 'time_step': 0.02224701690673828, 'td_error': 2.302462088550119, 'value_scale': 4.520625215305963, 'discounted_advantage': -2.045904786950612, 'initial_state': 6.823665618896484, 'diff_eval': 443.80199976107355} step=127000
2025-12-07 00:46.43 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_127000.d3


Epoch 128/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.17it/s, critic_loss=4.14, q_loss=3.96, v_loss=0.183, actor_loss=-2.38]


2025-12-07 00:47.09 [info     ] IQL_20251206235012: epoch=128 step=128000 epoch=128 metrics={'time_sample_batch': 0.004787287712097168, 'time_algorithm_update': 0.017194464683532716, 'critic_loss': 4.1431399451494215, 'q_loss': 3.9600449023246767, 'v_loss': 0.18309503850340844, 'actor_loss': -2.379776861548424, 'time_step': 0.02223767352104187, 'td_error': 2.5147764569741615, 'value_scale': 4.732593152497576, 'discounted_advantage': -3.1581225892179483, 'initial_state': 6.133254528045654, 'diff_eval': 480.0048573641259} step=128000
2025-12-07 00:47.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_128000.d3


Epoch 129/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.17it/s, critic_loss=4.21, q_loss=4.02, v_loss=0.187, actor_loss=-2.32]


2025-12-07 00:47.35 [info     ] IQL_20251206235012: epoch=129 step=129000 epoch=129 metrics={'time_sample_batch': 0.004752097845077514, 'time_algorithm_update': 0.01720813512802124, 'critic_loss': 4.205033850073814, 'q_loss': 4.018164973974228, 'v_loss': 0.18686887735128402, 'actor_loss': -2.325144487261772, 'time_step': 0.02221831727027893, 'td_error': 2.242747184141613, 'value_scale': 4.465465940325072, 'discounted_advantage': -2.3882844235713847, 'initial_state': 6.7126383781433105, 'diff_eval': 412.7786550921311} step=129000
2025-12-07 00:47.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_129000.d3


Epoch 130/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.72it/s, critic_loss=4.12, q_loss=3.93, v_loss=0.192, actor_loss=-2.38]


2025-12-07 00:48.01 [info     ] IQL_20251206235012: epoch=130 step=130000 epoch=130 metrics={'time_sample_batch': 0.004769613742828369, 'time_algorithm_update': 0.01695643973350525, 'critic_loss': 4.125687744021416, 'q_loss': 3.9336636579036712, 'v_loss': 0.1920240822657943, 'actor_loss': -2.3771872452795506, 'time_step': 0.02197547793388367, 'td_error': 2.3222700603502955, 'value_scale': 4.454008409655079, 'discounted_advantage': -1.5371937761946497, 'initial_state': 5.221896171569824, 'diff_eval': 389.2220053045593} step=130000
2025-12-07 00:48.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_130000.d3


Epoch 131/200: 100%|██████████| 1000/1000 [00:23<00:00, 41.81it/s, critic_loss=4.11, q_loss=3.93, v_loss=0.187, actor_loss=-2.32]


2025-12-07 00:48.28 [info     ] IQL_20251206235012: epoch=131 step=131000 epoch=131 metrics={'time_sample_batch': 0.005014204263687134, 'time_algorithm_update': 0.018198018550872802, 'critic_loss': 4.116591509103775, 'q_loss': 3.9301499163508415, 'v_loss': 0.1864415968582034, 'actor_loss': -2.3169245678186416, 'time_step': 0.023485590696334838, 'td_error': 2.976972011291719, 'value_scale': 4.606841391981373, 'discounted_advantage': -2.8839916332757918, 'initial_state': 4.8479132652282715, 'diff_eval': 557.3400051763977} step=131000
2025-12-07 00:48.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_131000.d3


Epoch 132/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.13it/s, critic_loss=4.19, q_loss=4.01, v_loss=0.187, actor_loss=-2.32]


2025-12-07 00:48.55 [info     ] IQL_20251206235012: epoch=132 step=132000 epoch=132 metrics={'time_sample_batch': 0.0047289998531341556, 'time_algorithm_update': 0.017201327323913575, 'critic_loss': 4.1896585828065875, 'q_loss': 4.002553077697754, 'v_loss': 0.18710550309345125, 'actor_loss': -2.3264960012733935, 'time_step': 0.022198691368103027, 'td_error': 2.3902645904374418, 'value_scale': 4.611181885888136, 'discounted_advantage': -2.2557671390456595, 'initial_state': 6.773038387298584, 'diff_eval': 439.0441408907991} step=132000
2025-12-07 00:48.55 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_132000.d3


Epoch 133/200: 100%|██████████| 1000/1000 [00:23<00:00, 41.83it/s, critic_loss=4.11, q_loss=3.93, v_loss=0.189, actor_loss=-2.33]


2025-12-07 00:49.22 [info     ] IQL_20251206235012: epoch=133 step=133000 epoch=133 metrics={'time_sample_batch': 0.0058189029693603515, 'time_algorithm_update': 0.017390573740005494, 'critic_loss': 4.116016768455506, 'q_loss': 3.927195403456688, 'v_loss': 0.188821365095675, 'actor_loss': -2.3180144203305244, 'time_step': 0.0234735963344574, 'td_error': 2.577831171852439, 'value_scale': 4.435856253747131, 'discounted_advantage': -2.4925433509269936, 'initial_state': 5.197575569152832, 'diff_eval': 501.28138886386046} step=133000
2025-12-07 00:49.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_133000.d3


Epoch 134/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.23it/s, critic_loss=4.13, q_loss=3.94, v_loss=0.189, actor_loss=-2.41]


2025-12-07 00:49.48 [info     ] IQL_20251206235012: epoch=134 step=134000 epoch=134 metrics={'time_sample_batch': 0.004741329908370972, 'time_algorithm_update': 0.017211886405944826, 'critic_loss': 4.130079118609428, 'q_loss': 3.940829213142395, 'v_loss': 0.18924990779161452, 'actor_loss': -2.4120954177975653, 'time_step': 0.02219824457168579, 'td_error': 2.5085284343443064, 'value_scale': 4.3413927564670685, 'discounted_advantage': -2.263293196792986, 'initial_state': 6.253119468688965, 'diff_eval': 601.160627553089} step=134000
2025-12-07 00:49.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_134000.d3


Epoch 135/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.85it/s, critic_loss=4.12, q_loss=3.93, v_loss=0.19, actor_loss=-2.42]


2025-12-07 00:50.15 [info     ] IQL_20251206235012: epoch=135 step=135000 epoch=135 metrics={'time_sample_batch': 0.005013547658920288, 'time_algorithm_update': 0.017636104106903076, 'critic_loss': 4.117343638300896, 'q_loss': 3.927940099000931, 'v_loss': 0.18940353445708752, 'actor_loss': -2.4213993868529795, 'time_step': 0.02292341709136963, 'td_error': 2.4148079183392515, 'value_scale': 4.747676490450064, 'discounted_advantage': -2.831211401619038, 'initial_state': 6.630931854248047, 'diff_eval': 421.8153832681386} step=135000
2025-12-07 00:50.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_135000.d3


Epoch 136/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.18it/s, critic_loss=4.17, q_loss=3.98, v_loss=0.186, actor_loss=-2.43]


2025-12-07 00:50.42 [info     ] IQL_20251206235012: epoch=136 step=136000 epoch=136 metrics={'time_sample_batch': 0.005324029684066772, 'time_algorithm_update': 0.017682459354400635, 'critic_loss': 4.173372392058372, 'q_loss': 3.987129302740097, 'v_loss': 0.18624308866262435, 'actor_loss': -2.428055888980627, 'time_step': 0.02328262996673584, 'td_error': 2.4543093244642415, 'value_scale': 4.4566444871706725, 'discounted_advantage': -1.5473701967969027, 'initial_state': 5.576317310333252, 'diff_eval': 577.8188544208715} step=136000
2025-12-07 00:50.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_136000.d3


Epoch 137/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.22it/s, critic_loss=4.15, q_loss=3.96, v_loss=0.188, actor_loss=-2.39]


2025-12-07 00:51.09 [info     ] IQL_20251206235012: epoch=137 step=137000 epoch=137 metrics={'time_sample_batch': 0.004753986120223999, 'time_algorithm_update': 0.017183180570602415, 'critic_loss': 4.1471016166210175, 'q_loss': 3.9590154234170916, 'v_loss': 0.18808619340509175, 'actor_loss': -2.394602628797293, 'time_step': 0.02220012354850769, 'td_error': 2.3815853728378835, 'value_scale': 4.370206611643156, 'discounted_advantage': -1.9091719667589573, 'initial_state': 6.170148849487305, 'diff_eval': 416.1110415249667} step=137000
2025-12-07 00:51.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_137000.d3


Epoch 138/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.02it/s, critic_loss=4.07, q_loss=3.88, v_loss=0.186, actor_loss=-2.34]


2025-12-07 00:51.35 [info     ] IQL_20251206235012: epoch=138 step=138000 epoch=138 metrics={'time_sample_batch': 0.004712169885635376, 'time_algorithm_update': 0.017258724212646483, 'critic_loss': 4.065406093597412, 'q_loss': 3.87948158121109, 'v_loss': 0.1859245131239295, 'actor_loss': -2.344708350092173, 'time_step': 0.02225030851364136, 'td_error': 2.305147900129308, 'value_scale': 4.40749612929415, 'discounted_advantage': -2.3740880295910807, 'initial_state': 6.323453426361084, 'diff_eval': 403.88943281569317} step=138000
2025-12-07 00:51.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_138000.d3


Epoch 139/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.11it/s, critic_loss=4.16, q_loss=3.97, v_loss=0.193, actor_loss=-2.51]


2025-12-07 00:52.02 [info     ] IQL_20251206235012: epoch=139 step=139000 epoch=139 metrics={'time_sample_batch': 0.005025347948074341, 'time_algorithm_update': 0.018034909248352052, 'critic_loss': 4.166574949026108, 'q_loss': 3.9742406734228135, 'v_loss': 0.19233427917957305, 'actor_loss': -2.5160029792040586, 'time_step': 0.02333027958869934, 'td_error': 2.456259006026919, 'value_scale': 4.611046458065802, 'discounted_advantage': -2.4313804287156326, 'initial_state': 7.103155612945557, 'diff_eval': 391.94719046459664} step=139000
2025-12-07 00:52.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_139000.d3


Epoch 140/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.18it/s, critic_loss=4.19, q_loss=4, v_loss=0.193, actor_loss=-2.38]  


2025-12-07 00:52.29 [info     ] IQL_20251206235012: epoch=140 step=140000 epoch=140 metrics={'time_sample_batch': 0.004864224195480347, 'time_algorithm_update': 0.017606781244277954, 'critic_loss': 4.189066962957382, 'q_loss': 3.9955936670303345, 'v_loss': 0.19347329737246036, 'actor_loss': -2.380161064386368, 'time_step': 0.022732585191726686, 'td_error': 2.7514214618127286, 'value_scale': 4.576537239705646, 'discounted_advantage': -3.803133343615456, 'initial_state': 5.684650897979736, 'diff_eval': 604.3021942357152} step=140000
2025-12-07 00:52.29 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_140000.d3


Epoch 141/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.75it/s, critic_loss=4.07, q_loss=3.88, v_loss=0.193, actor_loss=-2.5]


2025-12-07 00:52.56 [info     ] IQL_20251206235012: epoch=141 step=141000 epoch=141 metrics={'time_sample_batch': 0.004930227279663086, 'time_algorithm_update': 0.01778097367286682, 'critic_loss': 4.072249145030975, 'q_loss': 3.8791111222505568, 'v_loss': 0.19313802529126406, 'actor_loss': -2.4897973307967187, 'time_step': 0.023001720905303957, 'td_error': 2.294327273926979, 'value_scale': 4.5625617134932055, 'discounted_advantage': -1.7070637140091065, 'initial_state': 7.1984992027282715, 'diff_eval': 492.06656479205327} step=141000
2025-12-07 00:52.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_141000.d3


Epoch 142/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.66it/s, critic_loss=4.02, q_loss=3.83, v_loss=0.186, actor_loss=-2.44]


2025-12-07 00:53.22 [info     ] IQL_20251206235012: epoch=142 step=142000 epoch=142 metrics={'time_sample_batch': 0.004903147459030152, 'time_algorithm_update': 0.017321259021759034, 'critic_loss': 4.011837499260903, 'q_loss': 3.8262689528465272, 'v_loss': 0.18556854532659053, 'actor_loss': -2.4347098390311004, 'time_step': 0.022495553255081176, 'td_error': 2.447524383475019, 'value_scale': 4.571410969433695, 'discounted_advantage': -2.6708602983560774, 'initial_state': 6.039306640625, 'diff_eval': 480.14783310682543} step=142000
2025-12-07 00:53.22 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_142000.d3


Epoch 143/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.12it/s, critic_loss=4.05, q_loss=3.86, v_loss=0.19, actor_loss=-2.44]


2025-12-07 00:53.48 [info     ] IQL_20251206235012: epoch=143 step=143000 epoch=143 metrics={'time_sample_batch': 0.00482197642326355, 'time_algorithm_update': 0.01716150522232056, 'critic_loss': 4.053457081913948, 'q_loss': 3.863904734969139, 'v_loss': 0.18955234195291995, 'actor_loss': -2.435790590673685, 'time_step': 0.022250047206878662, 'td_error': 2.3689401786650226, 'value_scale': 4.355566463981226, 'discounted_advantage': -2.8014984036187287, 'initial_state': 5.89272928237915, 'diff_eval': 433.0288254554268} step=143000
2025-12-07 00:53.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_143000.d3


Epoch 144/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.24it/s, critic_loss=4.07, q_loss=3.88, v_loss=0.188, actor_loss=-2.52]


2025-12-07 00:54.15 [info     ] IQL_20251206235012: epoch=144 step=144000 epoch=144 metrics={'time_sample_batch': 0.00480746078491211, 'time_algorithm_update': 0.017137804269790648, 'critic_loss': 4.066966893792152, 'q_loss': 3.8793126541376113, 'v_loss': 0.1876542436555028, 'actor_loss': -2.518141858384013, 'time_step': 0.022195876836776735, 'td_error': 2.292647702111447, 'value_scale': 4.768156255940436, 'discounted_advantage': -1.7694303205545796, 'initial_state': 7.1523308753967285, 'diff_eval': 459.3786579081971} step=144000
2025-12-07 00:54.15 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_144000.d3


Epoch 145/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.47it/s, critic_loss=4.06, q_loss=3.87, v_loss=0.187, actor_loss=-2.44]


2025-12-07 00:54.42 [info     ] IQL_20251206235012: epoch=145 step=145000 epoch=145 metrics={'time_sample_batch': 0.005097702026367188, 'time_algorithm_update': 0.01775752282142639, 'critic_loss': 4.056207000732422, 'q_loss': 3.869484404325485, 'v_loss': 0.1867225936949253, 'actor_loss': -2.440950270280242, 'time_step': 0.023124738931655883, 'td_error': 2.4524319740882947, 'value_scale': 4.461983573945412, 'discounted_advantage': -2.1545870886165996, 'initial_state': 5.819893836975098, 'diff_eval': 416.1956516958784} step=145000
2025-12-07 00:54.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_145000.d3


Epoch 146/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.50it/s, critic_loss=4.14, q_loss=3.95, v_loss=0.192, actor_loss=-2.45]


2025-12-07 00:55.08 [info     ] IQL_20251206235012: epoch=146 step=146000 epoch=146 metrics={'time_sample_batch': 0.004892322540283203, 'time_algorithm_update': 0.01742004871368408, 'critic_loss': 4.135251165032387, 'q_loss': 3.9433552364110946, 'v_loss': 0.19189592063426972, 'actor_loss': -2.44645176461339, 'time_step': 0.0225854914188385, 'td_error': 2.3869956755979627, 'value_scale': 4.231661418337322, 'discounted_advantage': -1.688780813467167, 'initial_state': 4.9218902587890625, 'diff_eval': 428.94951333267824} step=146000
2025-12-07 00:55.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_146000.d3


Epoch 147/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.96it/s, critic_loss=3.97, q_loss=3.79, v_loss=0.188, actor_loss=-2.38]


2025-12-07 00:55.35 [info     ] IQL_20251206235012: epoch=147 step=147000 epoch=147 metrics={'time_sample_batch': 0.004800595760345459, 'time_algorithm_update': 0.017250524044036864, 'critic_loss': 3.9739810940027236, 'q_loss': 3.7853653757572174, 'v_loss': 0.18861571730673313, 'actor_loss': -2.381208317756653, 'time_step': 0.02232920479774475, 'td_error': 2.3165902629940924, 'value_scale': 4.505163431817108, 'discounted_advantage': -1.9635099934747182, 'initial_state': 7.321080207824707, 'diff_eval': 407.9218979023438} step=147000
2025-12-07 00:55.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_147000.d3


Epoch 148/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.55it/s, critic_loss=4.1, q_loss=3.91, v_loss=0.191, actor_loss=-2.52]


2025-12-07 00:56.00 [info     ] IQL_20251206235012: epoch=148 step=148000 epoch=148 metrics={'time_sample_batch': 0.004718703269958496, 'time_algorithm_update': 0.017070910453796388, 'critic_loss': 4.099271314024925, 'q_loss': 3.9081680282354356, 'v_loss': 0.19110328617691993, 'actor_loss': -2.51464874073863, 'time_step': 0.022036811828613282, 'td_error': 2.4762860475649493, 'value_scale': 4.612108516289048, 'discounted_advantage': -2.4840573139911495, 'initial_state': 6.854346752166748, 'diff_eval': 456.6225161582451} step=148000
2025-12-07 00:56.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_148000.d3


Epoch 149/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.55it/s, critic_loss=4.04, q_loss=3.84, v_loss=0.195, actor_loss=-2.56]


2025-12-07 00:56.26 [info     ] IQL_20251206235012: epoch=149 step=149000 epoch=149 metrics={'time_sample_batch': 0.004749236583709716, 'time_algorithm_update': 0.01703994369506836, 'critic_loss': 4.0384250549077985, 'q_loss': 3.8432224518060685, 'v_loss': 0.19520260301977396, 'actor_loss': -2.5571123505234716, 'time_step': 0.02204333233833313, 'td_error': 2.3736478197123843, 'value_scale': 4.34171691938707, 'discounted_advantage': -1.7963765967963952, 'initial_state': 4.919194221496582, 'diff_eval': 349.3129288764017} step=149000
2025-12-07 00:56.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_149000.d3


Epoch 150/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.63it/s, critic_loss=3.97, q_loss=3.77, v_loss=0.191, actor_loss=-2.49]


2025-12-07 00:56.53 [info     ] IQL_20251206235012: epoch=150 step=150000 epoch=150 metrics={'time_sample_batch': 0.004910666942596436, 'time_algorithm_update': 0.017318796396255495, 'critic_loss': 3.9635159454345703, 'q_loss': 3.77242370223999, 'v_loss': 0.19109224543720485, 'actor_loss': -2.4855453834831716, 'time_step': 0.02248417377471924, 'td_error': 2.32702976870689, 'value_scale': 4.588743043936202, 'discounted_advantage': -2.1174675888805394, 'initial_state': 6.793062686920166, 'diff_eval': 446.23666372344525} step=150000
2025-12-07 00:56.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_150000.d3


Epoch 151/200: 100%|██████████| 1000/1000 [00:24<00:00, 41.66it/s, critic_loss=4.01, q_loss=3.82, v_loss=0.198, actor_loss=-2.63]


2025-12-07 00:57.20 [info     ] IQL_20251206235012: epoch=151 step=151000 epoch=151 metrics={'time_sample_batch': 0.005006813287734986, 'time_algorithm_update': 0.01828012466430664, 'critic_loss': 4.011303946137429, 'q_loss': 3.813586546421051, 'v_loss': 0.19771740381419659, 'actor_loss': -2.6254457692056894, 'time_step': 0.02356566882133484, 'td_error': 2.526247117348178, 'value_scale': 4.106743199193392, 'discounted_advantage': -1.6594702980745442, 'initial_state': 4.322988986968994, 'diff_eval': 366.8311761793807} step=151000
2025-12-07 00:57.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_151000.d3


Epoch 152/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.53it/s, critic_loss=4.07, q_loss=3.87, v_loss=0.198, actor_loss=-2.49]


2025-12-07 00:57.48 [info     ] IQL_20251206235012: epoch=152 step=152000 epoch=152 metrics={'time_sample_batch': 0.0049014151096344, 'time_algorithm_update': 0.017913920402526855, 'critic_loss': 4.060180499911309, 'q_loss': 3.862199041008949, 'v_loss': 0.19798146387189627, 'actor_loss': -2.483712688654661, 'time_step': 0.02308361530303955, 'td_error': 2.2221820621578, 'value_scale': 4.296506236762836, 'discounted_advantage': -0.7769616679213089, 'initial_state': 6.093905448913574, 'diff_eval': 530.6077501210419} step=152000
2025-12-07 00:57.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_152000.d3


Epoch 153/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.88it/s, critic_loss=3.99, q_loss=3.8, v_loss=0.194, actor_loss=-2.5] 


2025-12-07 00:58.15 [info     ] IQL_20251206235012: epoch=153 step=153000 epoch=153 metrics={'time_sample_batch': 0.004726768255233765, 'time_algorithm_update': 0.017377703189849852, 'critic_loss': 3.98621419608593, 'q_loss': 3.7915510455369947, 'v_loss': 0.1946631471812725, 'actor_loss': -2.5067193346321583, 'time_step': 0.02236085820198059, 'td_error': 2.3385651998591586, 'value_scale': 4.483780146827055, 'discounted_advantage': -1.766729774873328, 'initial_state': 6.571690082550049, 'diff_eval': 414.885392475662} step=153000
2025-12-07 00:58.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_153000.d3


Epoch 154/200: 100%|██████████| 1000/1000 [00:22<00:00, 45.18it/s, critic_loss=3.94, q_loss=3.75, v_loss=0.19, actor_loss=-2.54]


2025-12-07 00:58.41 [info     ] IQL_20251206235012: epoch=154 step=154000 epoch=154 metrics={'time_sample_batch': 0.004653056144714355, 'time_algorithm_update': 0.01680621361732483, 'critic_loss': 3.9370998948812486, 'q_loss': 3.74687868475914, 'v_loss': 0.1902212065681815, 'actor_loss': -2.53970434050262, 'time_step': 0.021711341857910155, 'td_error': 2.27450436578453, 'value_scale': 4.427293145926849, 'discounted_advantage': -1.6151448119423018, 'initial_state': 5.335783004760742, 'diff_eval': 380.54560789933066} step=154000
2025-12-07 00:58.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_154000.d3


Epoch 155/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.67it/s, critic_loss=4.1, q_loss=3.91, v_loss=0.195, actor_loss=-2.53]


2025-12-07 00:59.08 [info     ] IQL_20251206235012: epoch=155 step=155000 epoch=155 metrics={'time_sample_batch': 0.0047076096534729005, 'time_algorithm_update': 0.018080232858657837, 'critic_loss': 4.101781953811646, 'q_loss': 3.9063273116350175, 'v_loss': 0.19545464189350606, 'actor_loss': -2.537942173242569, 'time_step': 0.023034599304199217, 'td_error': 2.2140297864874547, 'value_scale': 4.484990334578955, 'discounted_advantage': -1.9520795673441362, 'initial_state': 7.099438667297363, 'diff_eval': 386.89965908847853} step=155000
2025-12-07 00:59.08 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_155000.d3


Epoch 156/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.72it/s, critic_loss=3.99, q_loss=3.8, v_loss=0.192, actor_loss=-2.55]


2025-12-07 00:59.34 [info     ] IQL_20251206235012: epoch=156 step=156000 epoch=156 metrics={'time_sample_batch': 0.0048525543212890624, 'time_algorithm_update': 0.01730854606628418, 'critic_loss': 3.9873942139148713, 'q_loss': 3.795741248726845, 'v_loss': 0.19165296272933482, 'actor_loss': -2.552894466787577, 'time_step': 0.02244754958152771, 'td_error': 2.395838080646006, 'value_scale': 4.560091609711394, 'discounted_advantage': -2.5628913727150477, 'initial_state': 6.668245792388916, 'diff_eval': 445.31817449108445} step=156000
2025-12-07 00:59.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_156000.d3


Epoch 157/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.79it/s, critic_loss=3.96, q_loss=3.76, v_loss=0.196, actor_loss=-2.49]


2025-12-07 01:00.01 [info     ] IQL_20251206235012: epoch=157 step=157000 epoch=157 metrics={'time_sample_batch': 0.004748767137527466, 'time_algorithm_update': 0.017345879077911377, 'critic_loss': 3.9641039904356004, 'q_loss': 3.767996321797371, 'v_loss': 0.1961076708212495, 'actor_loss': -2.4985909668654203, 'time_step': 0.022387450695037843, 'td_error': 2.3784575380536417, 'value_scale': 4.302138136551471, 'discounted_advantage': -1.6524930837450778, 'initial_state': 4.9557366371154785, 'diff_eval': 404.21060629487255} step=157000
2025-12-07 01:00.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_157000.d3


Epoch 158/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.69it/s, critic_loss=4.02, q_loss=3.83, v_loss=0.197, actor_loss=-2.47]


2025-12-07 01:00.28 [info     ] IQL_20251206235012: epoch=158 step=158000 epoch=158 metrics={'time_sample_batch': 0.004757861614227295, 'time_algorithm_update': 0.017987947225570677, 'critic_loss': 4.024343015313148, 'q_loss': 3.827781341075897, 'v_loss': 0.19656167804449798, 'actor_loss': -2.47313482606411, 'time_step': 0.023026400327682495, 'td_error': 2.349768063839105, 'value_scale': 4.381620560636102, 'discounted_advantage': -2.2656132231952877, 'initial_state': 5.769944667816162, 'diff_eval': 367.8879404507218} step=158000
2025-12-07 01:00.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_158000.d3


Epoch 159/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.30it/s, critic_loss=3.96, q_loss=3.77, v_loss=0.194, actor_loss=-2.51]


2025-12-07 01:00.54 [info     ] IQL_20251206235012: epoch=159 step=159000 epoch=159 metrics={'time_sample_batch': 0.004617856740951538, 'time_algorithm_update': 0.01727240014076233, 'critic_loss': 3.9570082627534866, 'q_loss': 3.7627792498469352, 'v_loss': 0.19422900998592377, 'actor_loss': -2.5080013193041086, 'time_step': 0.02215148663520813, 'td_error': 2.247932665563288, 'value_scale': 4.479576314951715, 'discounted_advantage': -1.5408835412898028, 'initial_state': 5.962404251098633, 'diff_eval': 369.74094673252364} step=159000
2025-12-07 01:00.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_159000.d3


Epoch 160/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.25it/s, critic_loss=4.01, q_loss=3.81, v_loss=0.199, actor_loss=-2.62]


2025-12-07 01:01.20 [info     ] IQL_20251206235012: epoch=160 step=160000 epoch=160 metrics={'time_sample_batch': 0.004785288333892823, 'time_algorithm_update': 0.017144253492355346, 'critic_loss': 4.009825528860092, 'q_loss': 3.8104883556365965, 'v_loss': 0.19933717405050994, 'actor_loss': -2.6189923460483553, 'time_step': 0.02218507742881775, 'td_error': 2.3089484933534288, 'value_scale': 4.289769806824188, 'discounted_advantage': -2.001129822495383, 'initial_state': 5.894195556640625, 'diff_eval': 434.7655324891893} step=160000
2025-12-07 01:01.20 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_160000.d3


Epoch 161/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.16it/s, critic_loss=3.98, q_loss=3.8, v_loss=0.186, actor_loss=-2.5] 


2025-12-07 01:01.46 [info     ] IQL_20251206235012: epoch=161 step=161000 epoch=161 metrics={'time_sample_batch': 0.004950716495513916, 'time_algorithm_update': 0.01751157569885254, 'critic_loss': 3.985058054924011, 'q_loss': 3.798642085194588, 'v_loss': 0.1864159725084901, 'actor_loss': -2.50402808073163, 'time_step': 0.022739845275878905, 'td_error': 2.242173553505503, 'value_scale': 4.641025757756668, 'discounted_advantage': -0.27114161389540525, 'initial_state': 6.915942192077637, 'diff_eval': 527.0568887945692} step=161000
2025-12-07 01:01.47 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_161000.d3


Epoch 162/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.35it/s, critic_loss=3.98, q_loss=3.78, v_loss=0.194, actor_loss=-2.57]


2025-12-07 01:02.13 [info     ] IQL_20251206235012: epoch=162 step=162000 epoch=162 metrics={'time_sample_batch': 0.00480760145187378, 'time_algorithm_update': 0.017082823514938353, 'critic_loss': 3.987095515012741, 'q_loss': 3.79266198861599, 'v_loss': 0.19443352580815554, 'actor_loss': -2.571287876486778, 'time_step': 0.022137758255004882, 'td_error': 2.5773695846672298, 'value_scale': 4.381761589771401, 'discounted_advantage': -2.9296162946105926, 'initial_state': 5.044669151306152, 'diff_eval': 449.76777876493975} step=162000
2025-12-07 01:02.13 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_162000.d3


Epoch 163/200: 100%|██████████| 1000/1000 [00:22<00:00, 45.24it/s, critic_loss=3.95, q_loss=3.75, v_loss=0.199, actor_loss=-2.65]


2025-12-07 01:02.38 [info     ] IQL_20251206235012: epoch=163 step=163000 epoch=163 metrics={'time_sample_batch': 0.004654609680175782, 'time_algorithm_update': 0.016790045738220214, 'critic_loss': 3.9467914981245995, 'q_loss': 3.7473539732694627, 'v_loss': 0.19943752823770047, 'actor_loss': -2.647473082214594, 'time_step': 0.021676022291183473, 'td_error': 2.2587311295187336, 'value_scale': 4.2411318694445095, 'discounted_advantage': -0.9463908811971996, 'initial_state': 5.65484619140625, 'diff_eval': 432.8948021749568} step=163000
2025-12-07 01:02.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_163000.d3


Epoch 164/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.28it/s, critic_loss=3.96, q_loss=3.75, v_loss=0.202, actor_loss=-2.6]


2025-12-07 01:03.04 [info     ] IQL_20251206235012: epoch=164 step=164000 epoch=164 metrics={'time_sample_batch': 0.004779377460479736, 'time_algorithm_update': 0.01712943959236145, 'critic_loss': 3.964708932518959, 'q_loss': 3.7627875468730925, 'v_loss': 0.20192138712853194, 'actor_loss': -2.6047965975105765, 'time_step': 0.022175229787826538, 'td_error': 2.350941620542812, 'value_scale': 4.281195748385826, 'discounted_advantage': -2.1123757941933694, 'initial_state': 5.535799503326416, 'diff_eval': 367.94655969237056} step=164000
2025-12-07 01:03.05 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_164000.d3


Epoch 165/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.81it/s, critic_loss=4.03, q_loss=3.83, v_loss=0.203, actor_loss=-2.59]


2025-12-07 01:03.32 [info     ] IQL_20251206235012: epoch=165 step=165000 epoch=165 metrics={'time_sample_batch': 0.004944529533386231, 'time_algorithm_update': 0.017727628707885742, 'critic_loss': 4.028953932285309, 'q_loss': 3.826434909582138, 'v_loss': 0.20251902390271426, 'actor_loss': -2.58662298476696, 'time_step': 0.02295087933540344, 'td_error': 2.1382484654658915, 'value_scale': 4.421329712141571, 'discounted_advantage': -1.632905610941321, 'initial_state': 5.665291786193848, 'diff_eval': 413.5857899433871} step=165000
2025-12-07 01:03.32 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_165000.d3


Epoch 166/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.01it/s, critic_loss=3.92, q_loss=3.72, v_loss=0.199, actor_loss=-2.55]


2025-12-07 01:03.58 [info     ] IQL_20251206235012: epoch=166 step=166000 epoch=166 metrics={'time_sample_batch': 0.004903666973114014, 'time_algorithm_update': 0.017637162208557128, 'critic_loss': 3.9233497469425203, 'q_loss': 3.7249020853042603, 'v_loss': 0.19844766408205033, 'actor_loss': -2.553051541924477, 'time_step': 0.02282551383972168, 'td_error': 2.348960998718767, 'value_scale': 4.312395030231748, 'discounted_advantage': -2.3339672643211196, 'initial_state': 5.371719837188721, 'diff_eval': 381.718288083605} step=166000
2025-12-07 01:03.59 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_166000.d3


Epoch 167/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.28it/s, critic_loss=3.97, q_loss=3.77, v_loss=0.198, actor_loss=-2.65]


2025-12-07 01:04.24 [info     ] IQL_20251206235012: epoch=167 step=167000 epoch=167 metrics={'time_sample_batch': 0.004789051532745362, 'time_algorithm_update': 0.017088416576385497, 'critic_loss': 3.9645177971124648, 'q_loss': 3.766439505338669, 'v_loss': 0.1980782970264554, 'actor_loss': -2.6519130167365073, 'time_step': 0.02214334464073181, 'td_error': 2.4397850924415887, 'value_scale': 4.355408393963703, 'discounted_advantage': -2.1321111645109077, 'initial_state': 5.541501522064209, 'diff_eval': 435.8790459662326} step=167000
2025-12-07 01:04.25 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_167000.d3


Epoch 168/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.54it/s, critic_loss=3.96, q_loss=3.77, v_loss=0.198, actor_loss=-2.65]


2025-12-07 01:04.50 [info     ] IQL_20251206235012: epoch=168 step=168000 epoch=168 metrics={'time_sample_batch': 0.004745451688766479, 'time_algorithm_update': 0.01696275496482849, 'critic_loss': 3.957255042552948, 'q_loss': 3.7595486282110215, 'v_loss': 0.19770642168074845, 'actor_loss': -2.644061017602682, 'time_step': 0.02198863935470581, 'td_error': 2.2963327124343578, 'value_scale': 4.506677690543678, 'discounted_advantage': -2.7314573202536776, 'initial_state': 6.495070934295654, 'diff_eval': 406.28016523394325} step=168000
2025-12-07 01:04.51 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_168000.d3


Epoch 169/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.64it/s, critic_loss=3.89, q_loss=3.69, v_loss=0.198, actor_loss=-2.71]


2025-12-07 01:05.16 [info     ] IQL_20251206235012: epoch=169 step=169000 epoch=169 metrics={'time_sample_batch': 0.004699388027191162, 'time_algorithm_update': 0.016978927850723267, 'critic_loss': 3.890836540341377, 'q_loss': 3.6928229454755783, 'v_loss': 0.1980135973468423, 'actor_loss': -2.704157511621714, 'time_step': 0.02196884822845459, 'td_error': 2.3948687819016183, 'value_scale': 4.479237182495986, 'discounted_advantage': -2.7894265095955184, 'initial_state': 6.055956840515137, 'diff_eval': 485.44187403258263} step=169000
2025-12-07 01:05.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_169000.d3


Epoch 170/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.51it/s, critic_loss=3.95, q_loss=3.75, v_loss=0.199, actor_loss=-2.63]


2025-12-07 01:05.42 [info     ] IQL_20251206235012: epoch=170 step=170000 epoch=170 metrics={'time_sample_batch': 0.0047360677719116215, 'time_algorithm_update': 0.016976428031921387, 'critic_loss': 3.9475296460390092, 'q_loss': 3.748305223941803, 'v_loss': 0.19922441577911376, 'actor_loss': -2.6238510624170304, 'time_step': 0.022002942085266115, 'td_error': 2.491693989765637, 'value_scale': 4.1292195694219735, 'discounted_advantage': -2.4282964335010986, 'initial_state': 4.2031636238098145, 'diff_eval': 363.93933312110795} step=170000
2025-12-07 01:05.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_170000.d3


Epoch 171/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.01it/s, critic_loss=3.93, q_loss=3.72, v_loss=0.206, actor_loss=-2.64]


2025-12-07 01:06.08 [info     ] IQL_20251206235012: epoch=171 step=171000 epoch=171 metrics={'time_sample_batch': 0.004808591365814209, 'time_algorithm_update': 0.017198027849197387, 'critic_loss': 3.922943747162819, 'q_loss': 3.717518921971321, 'v_loss': 0.2054248274192214, 'actor_loss': -2.6367573034763336, 'time_step': 0.022280914545059205, 'td_error': 2.0712071092760067, 'value_scale': 4.137097400561606, 'discounted_advantage': -1.9218910432365177, 'initial_state': 6.090670108795166, 'diff_eval': 384.0162993658889} step=171000
2025-12-07 01:06.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_171000.d3


Epoch 172/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.81it/s, critic_loss=3.89, q_loss=3.69, v_loss=0.2, actor_loss=-2.59] 


2025-12-07 01:06.34 [info     ] IQL_20251206235012: epoch=172 step=172000 epoch=172 metrics={'time_sample_batch': 0.004744192361831665, 'time_algorithm_update': 0.01689310026168823, 'critic_loss': 3.893192533016205, 'q_loss': 3.6930259172916413, 'v_loss': 0.20016660760343075, 'actor_loss': -2.5867643037438395, 'time_step': 0.02190673542022705, 'td_error': 2.1332230973090427, 'value_scale': 4.3069129240365776, 'discounted_advantage': -0.6085142774212043, 'initial_state': 6.162493705749512, 'diff_eval': 467.47356854729173} step=172000
2025-12-07 01:06.35 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_172000.d3


Epoch 173/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.34it/s, critic_loss=3.96, q_loss=3.75, v_loss=0.206, actor_loss=-2.66]


2025-12-07 01:07.01 [info     ] IQL_20251206235012: epoch=173 step=173000 epoch=173 metrics={'time_sample_batch': 0.004904248952865601, 'time_algorithm_update': 0.01746225357055664, 'critic_loss': 3.955432972073555, 'q_loss': 3.7496771771907804, 'v_loss': 0.20575579268485308, 'actor_loss': -2.6632215920090676, 'time_step': 0.022645238161087038, 'td_error': 2.290900242522985, 'value_scale': 4.141843987891118, 'discounted_advantage': -1.1530940248837276, 'initial_state': 6.232905387878418, 'diff_eval': 382.17707618183897} step=173000
2025-12-07 01:07.01 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_173000.d3


Epoch 174/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.84it/s, critic_loss=3.92, q_loss=3.73, v_loss=0.199, actor_loss=-2.66]


2025-12-07 01:07.28 [info     ] IQL_20251206235012: epoch=174 step=174000 epoch=174 metrics={'time_sample_batch': 0.004948890686035156, 'time_algorithm_update': 0.017662513017654417, 'critic_loss': 3.9201745454669, 'q_loss': 3.721488993227482, 'v_loss': 0.19868555433303117, 'actor_loss': -2.6608974092304707, 'time_step': 0.02290235710144043, 'td_error': 2.2851746497381478, 'value_scale': 4.563029479217651, 'discounted_advantage': -1.7996481100306263, 'initial_state': 6.072843551635742, 'diff_eval': 370.1792828319401} step=174000
2025-12-07 01:07.28 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_174000.d3


Epoch 175/200: 100%|██████████| 1000/1000 [00:22<00:00, 45.24it/s, critic_loss=3.96, q_loss=3.75, v_loss=0.206, actor_loss=-2.66]


2025-12-07 01:07.53 [info     ] IQL_20251206235012: epoch=175 step=175000 epoch=175 metrics={'time_sample_batch': 0.00458533763885498, 'time_algorithm_update': 0.016855087995529174, 'critic_loss': 3.960782561540604, 'q_loss': 3.7550655254125593, 'v_loss': 0.20571703392267227, 'actor_loss': -2.662820094972849, 'time_step': 0.021687211275100707, 'td_error': 2.447860789502172, 'value_scale': 4.444636191674707, 'discounted_advantage': -1.415872446955028, 'initial_state': 5.4632182121276855, 'diff_eval': 436.1359089757208} step=175000
2025-12-07 01:07.54 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_175000.d3


Epoch 176/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.49it/s, critic_loss=3.98, q_loss=3.77, v_loss=0.208, actor_loss=-2.6]


2025-12-07 01:08.21 [info     ] IQL_20251206235012: epoch=176 step=176000 epoch=176 metrics={'time_sample_batch': 0.004825823068618775, 'time_algorithm_update': 0.017464500427246094, 'critic_loss': 3.979000936985016, 'q_loss': 3.7709174035787583, 'v_loss': 0.20808354087918995, 'actor_loss': -2.596463671758771, 'time_step': 0.022563140630722046, 'td_error': 2.094941273110665, 'value_scale': 4.4300546637342615, 'discounted_advantage': -1.6878475122837746, 'initial_state': 6.716757774353027, 'diff_eval': 366.61819947043244} step=176000
2025-12-07 01:08.21 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_176000.d3


Epoch 177/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.60it/s, critic_loss=3.95, q_loss=3.75, v_loss=0.204, actor_loss=-2.73]


2025-12-07 01:08.48 [info     ] IQL_20251206235012: epoch=177 step=177000 epoch=177 metrics={'time_sample_batch': 0.004768301010131836, 'time_algorithm_update': 0.018023538112640382, 'critic_loss': 3.94692724609375, 'q_loss': 3.7428676331043245, 'v_loss': 0.20405961456149815, 'actor_loss': -2.720959197640419, 'time_step': 0.02304939365386963, 'td_error': 2.3299425078243883, 'value_scale': 4.416604463470052, 'discounted_advantage': -2.1454320347974387, 'initial_state': 6.781556606292725, 'diff_eval': 460.22999317379487} step=177000
2025-12-07 01:08.48 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_177000.d3


Epoch 178/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.22it/s, critic_loss=3.9, q_loss=3.69, v_loss=0.205, actor_loss=-2.7] 


2025-12-07 01:09.14 [info     ] IQL_20251206235012: epoch=178 step=178000 epoch=178 metrics={'time_sample_batch': 0.004873006105422973, 'time_algorithm_update': 0.01757742166519165, 'critic_loss': 3.9030815114974975, 'q_loss': 3.6977805244922637, 'v_loss': 0.20530098385363818, 'actor_loss': -2.701203324109316, 'time_step': 0.02271253514289856, 'td_error': 2.309885531461652, 'value_scale': 4.1018852155721195, 'discounted_advantage': -1.2580994855307464, 'initial_state': 5.088369846343994, 'diff_eval': 365.0715025665834} step=178000
2025-12-07 01:09.14 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_178000.d3


Epoch 179/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.33it/s, critic_loss=4.01, q_loss=3.8, v_loss=0.203, actor_loss=-2.58]


2025-12-07 01:09.41 [info     ] IQL_20251206235012: epoch=179 step=179000 epoch=179 metrics={'time_sample_batch': 0.0049796862602233885, 'time_algorithm_update': 0.017403812885284423, 'critic_loss': 4.005669564723968, 'q_loss': 3.8027874618172643, 'v_loss': 0.20288210137188434, 'actor_loss': -2.580985607653856, 'time_step': 0.022666088581085205, 'td_error': 2.35594428750807, 'value_scale': 4.429600344643923, 'discounted_advantage': -2.6622617457516724, 'initial_state': 5.982416152954102, 'diff_eval': 465.5035657047773} step=179000
2025-12-07 01:09.41 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_179000.d3


Epoch 180/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.52it/s, critic_loss=3.97, q_loss=3.76, v_loss=0.21, actor_loss=-2.79]


2025-12-07 01:10.07 [info     ] IQL_20251206235012: epoch=180 step=180000 epoch=180 metrics={'time_sample_batch': 0.00478007173538208, 'time_algorithm_update': 0.017014838695526124, 'critic_loss': 3.9739519292116166, 'q_loss': 3.7639174609184267, 'v_loss': 0.21003446850180627, 'actor_loss': -2.7924600235521795, 'time_step': 0.02205045509338379, 'td_error': 2.4685996427114234, 'value_scale': 4.3718486266973375, 'discounted_advantage': -1.7949246180710918, 'initial_state': 5.140383243560791, 'diff_eval': 342.297549657986} step=180000
2025-12-07 01:10.07 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_180000.d3


Epoch 181/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.08it/s, critic_loss=3.86, q_loss=3.66, v_loss=0.206, actor_loss=-2.67]


2025-12-07 01:10.34 [info     ] IQL_20251206235012: epoch=181 step=181000 epoch=181 metrics={'time_sample_batch': 0.005830845594406128, 'time_algorithm_update': 0.017175940990448, 'critic_loss': 3.8682188277244567, 'q_loss': 3.6626066423654557, 'v_loss': 0.20561218425631522, 'actor_loss': -2.6690446498394014, 'time_step': 0.02326158618927002, 'td_error': 2.293238761920834, 'value_scale': 4.415411857191953, 'discounted_advantage': -1.9259209174855472, 'initial_state': 7.220266342163086, 'diff_eval': 417.1284421394059} step=181000
2025-12-07 01:10.34 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_181000.d3


Epoch 182/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.51it/s, critic_loss=3.88, q_loss=3.67, v_loss=0.21, actor_loss=-2.54]


2025-12-07 01:11.00 [info     ] IQL_20251206235012: epoch=182 step=182000 epoch=182 metrics={'time_sample_batch': 0.004757669448852539, 'time_algorithm_update': 0.017035707235336303, 'critic_loss': 3.880375842690468, 'q_loss': 3.6706632165908815, 'v_loss': 0.2097126211002469, 'actor_loss': -2.537053833603859, 'time_step': 0.022056710481643677, 'td_error': 2.288526026873911, 'value_scale': 4.500550212399068, 'discounted_advantage': -1.2341486388221854, 'initial_state': 6.831788539886475, 'diff_eval': 453.4243001410445} step=182000
2025-12-07 01:11.00 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_182000.d3


Epoch 183/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.53it/s, critic_loss=3.94, q_loss=3.73, v_loss=0.208, actor_loss=-2.68]


2025-12-07 01:11.27 [info     ] IQL_20251206235012: epoch=183 step=183000 epoch=183 metrics={'time_sample_batch': 0.0049004385471343994, 'time_algorithm_update': 0.017369506120681763, 'critic_loss': 3.9424802955389024, 'q_loss': 3.7340037996768953, 'v_loss': 0.20847649136185645, 'actor_loss': -2.679144535303116, 'time_step': 0.022544862270355223, 'td_error': 2.189193246960602, 'value_scale': 4.2503715436679235, 'discounted_advantage': -1.7563574940600932, 'initial_state': 6.0920586585998535, 'diff_eval': 384.74505814038764} step=183000
2025-12-07 01:11.27 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_183000.d3


Epoch 184/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.73it/s, critic_loss=3.84, q_loss=3.63, v_loss=0.208, actor_loss=-2.79]


2025-12-07 01:11.53 [info     ] IQL_20251206235012: epoch=184 step=184000 epoch=184 metrics={'time_sample_batch': 0.004772903442382813, 'time_algorithm_update': 0.017386464595794678, 'critic_loss': 3.8447187502384184, 'q_loss': 3.6365243111252785, 'v_loss': 0.20819443994015455, 'actor_loss': -2.7904137049913404, 'time_step': 0.02242730736732483, 'td_error': 2.4294779076549853, 'value_scale': 4.485681289973437, 'discounted_advantage': -2.397887810338518, 'initial_state': 5.579646110534668, 'diff_eval': 368.53675706897445} step=184000
2025-12-07 01:11.53 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_184000.d3


Epoch 185/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.94it/s, critic_loss=3.94, q_loss=3.73, v_loss=0.213, actor_loss=-2.75]


2025-12-07 01:12.19 [info     ] IQL_20251206235012: epoch=185 step=185000 epoch=185 metrics={'time_sample_batch': 0.004797303676605224, 'time_algorithm_update': 0.017280972719192506, 'critic_loss': 3.948151986002922, 'q_loss': 3.7351481705904006, 'v_loss': 0.2130038196593523, 'actor_loss': -2.7465975515544416, 'time_step': 0.022334929227828978, 'td_error': 2.2576746409773283, 'value_scale': 4.370515684322403, 'discounted_advantage': -2.437048974631397, 'initial_state': 6.599380970001221, 'diff_eval': 378.2514246423975} step=185000
2025-12-07 01:12.19 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_185000.d3


Epoch 186/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.61it/s, critic_loss=3.86, q_loss=3.66, v_loss=0.208, actor_loss=-2.74]


2025-12-07 01:12.46 [info     ] IQL_20251206235012: epoch=186 step=186000 epoch=186 metrics={'time_sample_batch': 0.0048810384273529055, 'time_algorithm_update': 0.017383688926696776, 'critic_loss': 3.8600239424705505, 'q_loss': 3.6518024917244913, 'v_loss': 0.20822145168483258, 'actor_loss': -2.7430612140595914, 'time_step': 0.022515610694885255, 'td_error': 2.148080713225696, 'value_scale': 4.38576428244696, 'discounted_advantage': -2.1174875186555107, 'initial_state': 6.848641395568848, 'diff_eval': 344.7147421051837} step=186000
2025-12-07 01:12.46 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_186000.d3


Epoch 187/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.73it/s, critic_loss=3.81, q_loss=3.6, v_loss=0.21, actor_loss=-2.72] 


2025-12-07 01:13.12 [info     ] IQL_20251206235012: epoch=187 step=187000 epoch=187 metrics={'time_sample_batch': 0.004723326683044433, 'time_algorithm_update': 0.016953370571136474, 'critic_loss': 3.807116925120354, 'q_loss': 3.5974935303926467, 'v_loss': 0.20962339527904988, 'actor_loss': -2.715965920343995, 'time_step': 0.021935222148895265, 'td_error': 2.382244921116579, 'value_scale': 4.407938897174885, 'discounted_advantage': -2.166593135573346, 'initial_state': 5.933211803436279, 'diff_eval': 399.32715548338786} step=187000
2025-12-07 01:13.12 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_187000.d3


Epoch 188/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.85it/s, critic_loss=3.79, q_loss=3.59, v_loss=0.201, actor_loss=-2.8]


2025-12-07 01:13.38 [info     ] IQL_20251206235012: epoch=188 step=188000 epoch=188 metrics={'time_sample_batch': 0.004734558343887329, 'time_algorithm_update': 0.016905436992645264, 'critic_loss': 3.7932144569158552, 'q_loss': 3.592589115381241, 'v_loss': 0.20062533834576607, 'actor_loss': -2.7966733553260563, 'time_step': 0.02188770055770874, 'td_error': 2.2985464171812633, 'value_scale': 4.34819456738711, 'discounted_advantage': -1.8811535562296324, 'initial_state': 6.390115737915039, 'diff_eval': 383.3247320388197} step=188000
2025-12-07 01:13.38 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_188000.d3


Epoch 189/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.01it/s, critic_loss=3.94, q_loss=3.73, v_loss=0.215, actor_loss=-2.79]


2025-12-07 01:14.04 [info     ] IQL_20251206235012: epoch=189 step=189000 epoch=189 metrics={'time_sample_batch': 0.004835854053497315, 'time_algorithm_update': 0.01720557165145874, 'critic_loss': 3.9428596894741057, 'q_loss': 3.7285766706466674, 'v_loss': 0.2142830156981945, 'actor_loss': -2.7834816311001775, 'time_step': 0.022302791357040407, 'td_error': 2.169406987294035, 'value_scale': 4.47824339344313, 'discounted_advantage': -2.5874920231852974, 'initial_state': 6.594350814819336, 'diff_eval': 401.79966732930376} step=189000
2025-12-07 01:14.04 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_189000.d3


Epoch 190/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.14it/s, critic_loss=3.82, q_loss=3.61, v_loss=0.212, actor_loss=-2.77]


2025-12-07 01:14.30 [info     ] IQL_20251206235012: epoch=190 step=190000 epoch=190 metrics={'time_sample_batch': 0.0048552780151367185, 'time_algorithm_update': 0.017045511960983275, 'critic_loss': 3.825525949716568, 'q_loss': 3.614117115020752, 'v_loss': 0.21140883829444646, 'actor_loss': -2.7679632361978292, 'time_step': 0.022199729919433593, 'td_error': 2.290982346987421, 'value_scale': 4.224643931094142, 'discounted_advantage': -1.16200260341566, 'initial_state': 5.595499038696289, 'diff_eval': 404.7918214143579} step=190000
2025-12-07 01:14.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_190000.d3


Epoch 191/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.18it/s, critic_loss=3.77, q_loss=3.57, v_loss=0.205, actor_loss=-2.62]


2025-12-07 01:14.56 [info     ] IQL_20251206235012: epoch=191 step=191000 epoch=191 metrics={'time_sample_batch': 0.0047740466594696045, 'time_algorithm_update': 0.017142905712127687, 'critic_loss': 3.7748722469806673, 'q_loss': 3.569409376502037, 'v_loss': 0.20546287143975497, 'actor_loss': -2.620575914591551, 'time_step': 0.022186487913131714, 'td_error': 2.366491530832449, 'value_scale': 4.3003175787944645, 'discounted_advantage': -1.9221202659088406, 'initial_state': 5.336939811706543, 'diff_eval': 365.80420531508946} step=191000
2025-12-07 01:14.56 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_191000.d3


Epoch 192/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.38it/s, critic_loss=3.91, q_loss=3.69, v_loss=0.216, actor_loss=-2.84]


2025-12-07 01:15.23 [info     ] IQL_20251206235012: epoch=192 step=192000 epoch=192 metrics={'time_sample_batch': 0.004895926713943482, 'time_algorithm_update': 0.01743511199951172, 'critic_loss': 3.90507093167305, 'q_loss': 3.6896708176136017, 'v_loss': 0.21540011191368102, 'actor_loss': -2.833178164333105, 'time_step': 0.02261484956741333, 'td_error': 2.653493854180939, 'value_scale': 4.226072706833672, 'discounted_advantage': -2.4297312649467067, 'initial_state': 4.84774923324585, 'diff_eval': 383.68875784161133} step=192000
2025-12-07 01:15.23 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_192000.d3


Epoch 193/200: 100%|██████████| 1000/1000 [00:22<00:00, 43.75it/s, critic_loss=3.76, q_loss=3.56, v_loss=0.206, actor_loss=-2.78]


2025-12-07 01:15.49 [info     ] IQL_20251206235012: epoch=193 step=193000 epoch=193 metrics={'time_sample_batch': 0.00488362717628479, 'time_algorithm_update': 0.017188618659973145, 'critic_loss': 3.7626033750772474, 'q_loss': 3.5564845831394196, 'v_loss': 0.2061187913566828, 'actor_loss': -2.7830763275623323, 'time_step': 0.022369366645812988, 'td_error': 2.297962862387912, 'value_scale': 4.200930055853039, 'discounted_advantage': -1.764167429236462, 'initial_state': 5.628015041351318, 'diff_eval': 368.23913319154593} step=193000
2025-12-07 01:15.49 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_193000.d3


Epoch 194/200: 100%|██████████| 1000/1000 [00:23<00:00, 43.40it/s, critic_loss=3.91, q_loss=3.7, v_loss=0.21, actor_loss=-2.78] 


2025-12-07 01:16.15 [info     ] IQL_20251206235012: epoch=194 step=194000 epoch=194 metrics={'time_sample_batch': 0.004991114139556885, 'time_algorithm_update': 0.017338785886764525, 'critic_loss': 3.9036666051149367, 'q_loss': 3.6941202251911163, 'v_loss': 0.20954637914150953, 'actor_loss': -2.779492652222514, 'time_step': 0.02260839295387268, 'td_error': 2.200540808147224, 'value_scale': 4.471759941335221, 'discounted_advantage': -1.8226723502624806, 'initial_state': 6.555215835571289, 'diff_eval': 346.3879846452463} step=194000
2025-12-07 01:16.16 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_194000.d3


Epoch 195/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.69it/s, critic_loss=3.89, q_loss=3.68, v_loss=0.213, actor_loss=-2.77]


2025-12-07 01:16.42 [info     ] IQL_20251206235012: epoch=195 step=195000 epoch=195 metrics={'time_sample_batch': 0.004688756704330444, 'time_algorithm_update': 0.017016754865646362, 'critic_loss': 3.89174693107605, 'q_loss': 3.679062092065811, 'v_loss': 0.21268484049290418, 'actor_loss': -2.769662348181009, 'time_step': 0.021968326330184938, 'td_error': 2.2028890781957657, 'value_scale': 4.376096544187744, 'discounted_advantage': -2.234166265134097, 'initial_state': 6.32963752746582, 'diff_eval': 338.41836650773024} step=195000
2025-12-07 01:16.42 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_195000.d3


Epoch 196/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.48it/s, critic_loss=3.89, q_loss=3.68, v_loss=0.209, actor_loss=-2.91]


2025-12-07 01:17.09 [info     ] IQL_20251206235012: epoch=196 step=196000 epoch=196 metrics={'time_sample_batch': 0.005807228326797485, 'time_algorithm_update': 0.017077884674072265, 'critic_loss': 3.883005877137184, 'q_loss': 3.6734848878383635, 'v_loss': 0.2095209913253784, 'actor_loss': -2.9078404814302923, 'time_step': 0.023145484924316406, 'td_error': 2.2617208336883663, 'value_scale': 4.558241844278341, 'discounted_advantage': -1.9373192542526936, 'initial_state': 6.206298828125, 'diff_eval': 418.2396407684577} step=196000
2025-12-07 01:17.09 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_196000.d3


Epoch 197/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.51it/s, critic_loss=3.84, q_loss=3.62, v_loss=0.214, actor_loss=-2.75]


2025-12-07 01:17.36 [info     ] IQL_20251206235012: epoch=197 step=197000 epoch=197 metrics={'time_sample_batch': 0.004678730249404907, 'time_algorithm_update': 0.018165364265441894, 'critic_loss': 3.8327079198360443, 'q_loss': 3.6193439178466797, 'v_loss': 0.2133640018776059, 'actor_loss': -2.7521791622042655, 'time_step': 0.023105173826217653, 'td_error': 2.2821964780126742, 'value_scale': 4.361569968469859, 'discounted_advantage': -2.5126964614267906, 'initial_state': 6.3569464683532715, 'diff_eval': 384.4505224062202} step=197000
2025-12-07 01:17.36 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_197000.d3


Epoch 198/200: 100%|██████████| 1000/1000 [00:22<00:00, 44.57it/s, critic_loss=3.81, q_loss=3.6, v_loss=0.214, actor_loss=-2.93]


2025-12-07 01:18.02 [info     ] IQL_20251206235012: epoch=198 step=198000 epoch=198 metrics={'time_sample_batch': 0.0047635862827301026, 'time_algorithm_update': 0.017011022806167602, 'critic_loss': 3.8095924127697947, 'q_loss': 3.59548973608017, 'v_loss': 0.21410268051177264, 'actor_loss': -2.9287909581959246, 'time_step': 0.02202396869659424, 'td_error': 2.2348912399623493, 'value_scale': 4.600328320220475, 'discounted_advantage': -2.418209725765147, 'initial_state': 6.575725078582764, 'diff_eval': 383.4359061903748} step=198000
2025-12-07 01:18.02 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_198000.d3


Epoch 199/200: 100%|██████████| 1000/1000 [00:23<00:00, 42.48it/s, critic_loss=3.82, q_loss=3.6, v_loss=0.215, actor_loss=-2.73]


2025-12-07 01:18.30 [info     ] IQL_20251206235012: epoch=199 step=199000 epoch=199 metrics={'time_sample_batch': 0.004743489980697632, 'time_algorithm_update': 0.018152740955352784, 'critic_loss': 3.8161244480609895, 'q_loss': 3.6008103934526443, 'v_loss': 0.21531405185163022, 'actor_loss': -2.7327598309516907, 'time_step': 0.02316284990310669, 'td_error': 2.128642262860288, 'value_scale': 4.3788001049697955, 'discounted_advantage': -1.897362978628612, 'initial_state': 6.328591823577881, 'diff_eval': 369.6914758619569} step=199000
2025-12-07 01:18.30 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_199000.d3


Epoch 200/200: 100%|██████████| 1000/1000 [00:24<00:00, 40.26it/s, critic_loss=3.9, q_loss=3.68, v_loss=0.22, actor_loss=-2.8]  


2025-12-07 01:18.58 [info     ] IQL_20251206235012: epoch=200 step=200000 epoch=200 metrics={'time_sample_batch': 0.006180122852325439, 'time_algorithm_update': 0.017983187913894652, 'critic_loss': 3.9050112788677214, 'q_loss': 3.6844584316015245, 'v_loss': 0.22055284932255745, 'actor_loss': -2.8101650324761867, 'time_step': 0.024439037322998045, 'td_error': 2.0670665125998755, 'value_scale': 4.37828069248499, 'discounted_advantage': -1.7019425175642138, 'initial_state': 6.083855152130127, 'diff_eval': 352.70337413104585} step=200000
2025-12-07 01:18.58 [info     ] Model parameters are saved to logs/d3rlpy_logs\IQL_20251206235012\model_200000.d3
