Import the required libraries

In [1]:
from d3rlpy.datasets import get_cartpole
from d3rlpy.algos import DiscreteCQL, DQN
from d3rlpy.metrics.scorer import discounted_sum_of_advantage_scorer
from d3rlpy.metrics.scorer import evaluate_on_environment
from d3rlpy.dataset import Episode
from d3rlpy.dataset import MDPDataset

from d3rlpy.metrics.scorer import td_error_scorer
from d3rlpy.metrics.scorer import average_value_estimation_scorer
from sklearn.model_selection import train_test_split

import import_ipynb
import numpy as np
from random import random
from create_dataset import CreateDataset
from FootballEnv import FootballEnv

importing Jupyter notebook from FootballEnv.ipynb
(12, 16)
(5,)


Helper function to create a dummy dataset

In [6]:
def create_dataset():

    dataset_maker = CreateDataset()
    dataset_maker.loadFile('data.json')

    observations, actions, rewards = dataset_maker.createEpisodeDataset()

    # observations = np.array([[int(random() * 5) for i in range(5)] for i in range(5000)])
    # actions = np.array([int(random() * 5) for i in range(5000)])
    # rewards = np.array([ random() for i in range(5000)])
    terminals = np.array([[0, 0, 0, 0, 1] for i in range(len(observations))])

    return MDPDataset(
        observations,
        actions,
        rewards, 
        terminals,
    )

In [7]:
dataset = create_dataset()
train_episodes, test_episodes = train_test_split(dataset, test_size=0.2)

(12, 16)


In [11]:
# setup CQL algorithm
cql = DiscreteCQL(use_gpu=False)

env = FootballEnv()

# start training
output = cql.fit(
    train_episodes,
    eval_episodes=test_episodes,
    n_epochs=50,
    scorers={
        'environment': evaluate_on_environment(env), # evaluate with Football Env
        'advantage': discounted_sum_of_advantage_scorer, # smaller is better
        'td_error': td_error_scorer, # smaller is better
        'value_scale': average_value_estimation_scorer # smaller is better
    }
)

(12, 16)
2022-02-06 19:40.04 [debug    ] RoundIterator is selected.
2022-02-06 19:40.04 [info     ] Directory is created at d3rlpy_logs\DiscreteCQL_20220206194004
2022-02-06 19:40.04 [debug    ] Building models...
2022-02-06 19:40.04 [debug    ] Models have been built.
2022-02-06 19:40.04 [info     ] Parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\params.json params={'action_scaler': None, 'alpha': 1.0, 'batch_size': 32, 'encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'gamma': 0.99, 'generated_maxlen': 100000, 'learning_rate': 6.25e-05, 'n_critics': 1, 'n_frames': 1, 'n_steps': 1, 'optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'q_func_factory': {'type': 'mean', 'params': {'bootstrap': False, 'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': None, 'target_reduction_type': 'min', 'target_update_interval': 80

Epoch 1/50: 100%|██████████| 39/39 [00:00<00:00, 117.42it/s, loss=0.97]


2022-02-06 19:40.05 [info     ] DiscreteCQL_20220206194004: epoch=1 step=39 epoch=1 metrics={'time_sample_batch': 9.464606260642027e-05, 'time_algorithm_update': 0.008133221895266801, 'loss': 0.9401729412567921, 'time_step': 0.008381085518078927, 'environment': 2.776230746130838, 'advantage': 0.0, 'td_error': 0.03976752685707652, 'value_scale': 0.39979344606399536} step=39
2022-02-06 19:40.05 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_39.pt


Epoch 2/50: 100%|██████████| 39/39 [00:00<00:00, 121.08it/s, loss=0.737]


2022-02-06 19:40.06 [info     ] DiscreteCQL_20220206194004: epoch=2 step=78 epoch=2 metrics={'time_sample_batch': 0.00023157779987041766, 'time_algorithm_update': 0.007747919131547977, 'loss': 0.7230974145424671, 'time_step': 0.008157045413286258, 'environment': 2.2611585221115154, 'advantage': 0.0, 'td_error': 0.0549387208875487, 'value_scale': 0.4698936939239502} step=78
2022-02-06 19:40.06 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_78.pt


Epoch 3/50: 100%|██████████| 39/39 [00:00<00:00, 132.65it/s, loss=0.61] 


2022-02-06 19:40.07 [info     ] DiscreteCQL_20220206194004: epoch=3 step=117 epoch=3 metrics={'time_sample_batch': 0.00015327869317470453, 'time_algorithm_update': 0.0070517613337590145, 'loss': 0.5984340631044828, 'time_step': 0.007410391783102965, 'environment': 2.411861137378742, 'advantage': 0.0, 'td_error': 0.04674355626457327, 'value_scale': 0.4334442913532257} step=117
2022-02-06 19:40.07 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_117.pt


Epoch 4/50: 100%|██████████| 39/39 [00:00<00:00, 97.02it/s, loss=0.5]  


2022-02-06 19:40.08 [info     ] DiscreteCQL_20220206194004: epoch=4 step=156 epoch=4 metrics={'time_sample_batch': 0.0002051683572622446, 'time_algorithm_update': 0.009692840087108122, 'loss': 0.489152131936489, 'time_step': 0.010127966220562275, 'environment': 2.5933801946768997, 'advantage': 0.0, 'td_error': 0.034322526197001935, 'value_scale': 0.37140321731567383} step=156
2022-02-06 19:40.08 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_156.pt


Epoch 5/50: 100%|██████████| 39/39 [00:00<00:00, 65.82it/s, loss=0.396]


2022-02-06 19:40.09 [info     ] DiscreteCQL_20220206194004: epoch=5 step=195 epoch=5 metrics={'time_sample_batch': 0.0002308014111641126, 'time_algorithm_update': 0.014656299199813452, 'loss': 0.3859499387252025, 'time_step': 0.015039975826556865, 'environment': 2.787614056085938, 'advantage': 0.0, 'td_error': 0.02402889538279851, 'value_scale': 0.3106756657361984} step=195
2022-02-06 19:40.09 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_195.pt


Epoch 6/50: 100%|██████████| 39/39 [00:00<00:00, 131.84it/s, loss=0.302]


2022-02-06 19:40.09 [info     ] DiscreteCQL_20220206194004: epoch=6 step=234 epoch=6 metrics={'time_sample_batch': 0.00012875825930864384, 'time_algorithm_update': 0.0071522272550142724, 'loss': 0.2928946973421635, 'time_step': 0.007433616198026217, 'environment': 2.4024796349327255, 'advantage': 0.0, 'td_error': 0.015341420901449965, 'value_scale': 0.24801790714263916} step=234
2022-02-06 19:40.09 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_234.pt


Epoch 7/50: 100%|██████████| 39/39 [00:00<00:00, 149.07it/s, loss=0.222]


2022-02-06 19:40.10 [info     ] DiscreteCQL_20220206194004: epoch=7 step=273 epoch=7 metrics={'time_sample_batch': 0.00020647660279885316, 'time_algorithm_update': 0.006271857481736403, 'loss': 0.21461379528045654, 'time_step': 0.006682683260012896, 'environment': 2.195367132920057, 'advantage': 0.0, 'td_error': 0.009554851204484294, 'value_scale': 0.1953175961971283} step=273
2022-02-06 19:40.10 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_273.pt


Epoch 8/50: 100%|██████████| 39/39 [00:00<00:00, 147.44it/s, loss=0.159]


2022-02-06 19:40.11 [info     ] DiscreteCQL_20220206194004: epoch=8 step=312 epoch=8 metrics={'time_sample_batch': 0.00015401228880270934, 'time_algorithm_update': 0.0062734713921180135, 'loss': 0.15394680660504562, 'time_step': 0.006656072078607021, 'environment': 2.924986143642358, 'advantage': 0.0, 'td_error': 0.005976420655883885, 'value_scale': 0.15382690727710724} step=312
2022-02-06 19:40.11 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_312.pt


Epoch 9/50: 100%|██████████| 39/39 [00:00<00:00, 148.29it/s, loss=0.114]


2022-02-06 19:40.12 [info     ] DiscreteCQL_20220206194004: epoch=9 step=351 epoch=9 metrics={'time_sample_batch': 0.00020203223595252403, 'time_algorithm_update': 0.006312437546558869, 'loss': 0.11004230208121814, 'time_step': 0.006641773077157827, 'environment': 2.3768977200479093, 'advantage': 0.0, 'td_error': 0.0038159651286093776, 'value_scale': 0.1220160573720932} step=351
2022-02-06 19:40.12 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_351.pt


Epoch 10/50: 100%|██████████| 39/39 [00:00<00:00, 134.94it/s, loss=0.0821]


2022-02-06 19:40.12 [info     ] DiscreteCQL_20220206194004: epoch=10 step=390 epoch=10 metrics={'time_sample_batch': 0.0002566056373791817, 'time_algorithm_update': 0.006900732333843525, 'loss': 0.07961428795869534, 'time_step': 0.00733488645309057, 'environment': 2.307955817783115, 'advantage': 0.0, 'td_error': 0.002714640749445607, 'value_scale': 0.10197316482663155} step=390
2022-02-06 19:40.12 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_390.pt


Epoch 11/50: 100%|██████████| 39/39 [00:00<00:00, 149.41it/s, loss=0.0607]


2022-02-06 19:40.13 [info     ] DiscreteCQL_20220206194004: epoch=11 step=429 epoch=11 metrics={'time_sample_batch': 0.0002545760228083684, 'time_algorithm_update': 0.006258444908337715, 'loss': 0.05903022908247434, 'time_step': 0.006641803643642328, 'environment': 2.686090607306533, 'advantage': 0.0, 'td_error': 0.0018777867237105283, 'value_scale': 0.0835069939494133} step=429
2022-02-06 19:40.13 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_429.pt


Epoch 12/50: 100%|██████████| 39/39 [00:00<00:00, 143.36it/s, loss=0.0461]


2022-02-06 19:40.14 [info     ] DiscreteCQL_20220206194004: epoch=12 step=468 epoch=12 metrics={'time_sample_batch': 2.5688073573968348e-05, 'time_algorithm_update': 0.006731479595869015, 'loss': 0.044974373414730415, 'time_step': 0.006924170714158278, 'environment': 2.2645696132681183, 'advantage': 0.0, 'td_error': 0.00154028925852856, 'value_scale': 0.07474280148744583} step=468
2022-02-06 19:40.14 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_468.pt


Epoch 13/50: 100%|██████████| 39/39 [00:00<00:00, 146.59it/s, loss=0.036] 


2022-02-06 19:40.14 [info     ] DiscreteCQL_20220206194004: epoch=13 step=507 epoch=13 metrics={'time_sample_batch': 0.00020483823922964244, 'time_algorithm_update': 0.00626064569522173, 'loss': 0.035162294044708595, 'time_step': 0.006720634607168345, 'environment': 2.497848540124029, 'advantage': 0.0, 'td_error': 0.001315710278923632, 'value_scale': 0.0682704821228981} step=507
2022-02-06 19:40.14 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_507.pt


Epoch 14/50: 100%|██████████| 39/39 [00:00<00:00, 154.13it/s, loss=0.0287]


2022-02-06 19:40.15 [info     ] DiscreteCQL_20220206194004: epoch=14 step=546 epoch=14 metrics={'time_sample_batch': 5.126610780373598e-05, 'time_algorithm_update': 0.006181692465757713, 'loss': 0.028137449461680192, 'time_step': 0.006437179369804187, 'environment': 2.470083856570488, 'advantage': 0.0, 'td_error': 0.001157628281511336, 'value_scale': 0.0633053183555603} step=546
2022-02-06 19:40.15 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_546.pt


Epoch 15/50: 100%|██████████| 39/39 [00:00<00:00, 147.17it/s, loss=0.0234]


2022-02-06 19:40.16 [info     ] DiscreteCQL_20220206194004: epoch=15 step=585 epoch=15 metrics={'time_sample_batch': 0.000204923825386243, 'time_algorithm_update': 0.006335558035434821, 'loss': 0.022972167159120243, 'time_step': 0.006669166760566907, 'environment': 2.5966075379490556, 'advantage': 0.0, 'td_error': 0.00093350196414832, 'value_scale': 0.05547952279448509} step=585
2022-02-06 19:40.16 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_585.pt


Epoch 16/50: 100%|██████████| 39/39 [00:00<00:00, 147.73it/s, loss=0.0194]


2022-02-06 19:40.16 [info     ] DiscreteCQL_20220206194004: epoch=16 step=624 epoch=16 metrics={'time_sample_batch': 0.00017725504361666166, 'time_algorithm_update': 0.006284004602676783, 'loss': 0.019085268275095865, 'time_step': 0.006667033219948793, 'environment': 2.6417880055649485, 'advantage': 0.0, 'td_error': 0.0009064103870812801, 'value_scale': 0.05445431172847748} step=624
2022-02-06 19:40.16 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_624.pt


Epoch 17/50: 100%|██████████| 39/39 [00:00<00:00, 96.77it/s, loss=0.0163] 


2022-02-06 19:40.17 [info     ] DiscreteCQL_20220206194004: epoch=17 step=663 epoch=17 metrics={'time_sample_batch': 0.00012919230338854669, 'time_algorithm_update': 0.009949182852720603, 'loss': 0.016091536706647813, 'time_step': 0.010231115879156651, 'environment': 2.8430762838941863, 'advantage': 0.0, 'td_error': 0.0007048787393024725, 'value_scale': 0.046083781868219376} step=663
2022-02-06 19:40.17 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_663.pt


Epoch 18/50: 100%|██████████| 39/39 [00:00<00:00, 137.81it/s, loss=0.014] 


2022-02-06 19:40.18 [info     ] DiscreteCQL_20220206194004: epoch=18 step=702 epoch=18 metrics={'time_sample_batch': 0.00015364549098870694, 'time_algorithm_update': 0.006896648651514298, 'loss': 0.013748442419828514, 'time_step': 0.007178496091793745, 'environment': 2.469620120622723, 'advantage': 0.0, 'td_error': 0.0008927710273951561, 'value_scale': 0.053930506110191345} step=702
2022-02-06 19:40.18 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_702.pt


Epoch 19/50: 100%|██████████| 39/39 [00:00<00:00, 126.63it/s, loss=0.0121]


2022-02-06 19:40.19 [info     ] DiscreteCQL_20220206194004: epoch=19 step=741 epoch=19 metrics={'time_sample_batch': 0.00012855040721404247, 'time_algorithm_update': 0.007538489806346404, 'loss': 0.011882613604076398, 'time_step': 0.007846349324935522, 'environment': 2.4437777628147255, 'advantage': 0.0, 'td_error': 0.0008623630340380828, 'value_scale': 0.052743203938007355} step=741
2022-02-06 19:40.19 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_741.pt


Epoch 20/50: 100%|██████████| 39/39 [00:00<00:00, 121.50it/s, loss=0.0105]


2022-02-06 19:40.20 [info     ] DiscreteCQL_20220206194004: epoch=20 step=780 epoch=20 metrics={'time_sample_batch': 7.691750159630409e-05, 'time_algorithm_update': 0.007898208422538562, 'loss': 0.010360492035173453, 'time_step': 0.008102637070875902, 'environment': 2.5435142759727043, 'advantage': 0.0, 'td_error': 0.0006941256423225184, 'value_scale': 0.04559186100959778} step=780
2022-02-06 19:40.20 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_780.pt


Epoch 21/50: 100%|██████████| 39/39 [00:00<00:00, 131.53it/s, loss=0.00922]


2022-02-06 19:40.20 [info     ] DiscreteCQL_20220206194004: epoch=21 step=819 epoch=21 metrics={'time_sample_batch': 0.00010304573254707533, 'time_algorithm_update': 0.007269516969338441, 'loss': 0.009114403922397357, 'time_step': 0.007500214454455254, 'environment': 2.2942000161276175, 'advantage': 0.0, 'td_error': 0.0006031066554967879, 'value_scale': 0.04118075221776962} step=819
2022-02-06 19:40.20 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_819.pt


Epoch 22/50: 100%|██████████| 39/39 [00:00<00:00, 136.59it/s, loss=0.00817]


2022-02-06 19:40.21 [info     ] DiscreteCQL_20220206194004: epoch=22 step=858 epoch=22 metrics={'time_sample_batch': 0.0001532114469088041, 'time_algorithm_update': 0.006938799833640074, 'loss': 0.008083213514720019, 'time_step': 0.0072444402254544776, 'environment': 2.586274297996409, 'advantage': 0.0, 'td_error': 0.0006493119212969134, 'value_scale': 0.043478887528181076} step=858
2022-02-06 19:40.21 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_858.pt


Epoch 23/50: 100%|██████████| 39/39 [00:00<00:00, 147.17it/s, loss=0.0073] 


2022-02-06 19:40.22 [info     ] DiscreteCQL_20220206194004: epoch=23 step=897 epoch=23 metrics={'time_sample_batch': 0.0001534743186755058, 'time_algorithm_update': 0.0064108860798371145, 'loss': 0.007219530439052062, 'time_step': 0.00669247676164676, 'environment': 2.7504293784654186, 'advantage': 0.0, 'td_error': 0.0006035215461395183, 'value_scale': 0.041201986372470856} step=897
2022-02-06 19:40.22 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_897.pt


Epoch 24/50: 100%|██████████| 39/39 [00:00<00:00, 135.42it/s, loss=0.00656]


2022-02-06 19:40.22 [info     ] DiscreteCQL_20220206194004: epoch=24 step=936 epoch=24 metrics={'time_sample_batch': 0.00012891720502804487, 'time_algorithm_update': 0.007026488964374249, 'loss': 0.0064860667364719585, 'time_step': 0.007308464783888597, 'environment': 2.38348064006409, 'advantage': 0.0, 'td_error': 0.0006093078621169923, 'value_scale': 0.041496988385915756} step=936
2022-02-06 19:40.22 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_936.pt


Epoch 25/50: 100%|██████████| 39/39 [00:00<00:00, 140.79it/s, loss=0.00593]


2022-02-06 19:40.23 [info     ] DiscreteCQL_20220206194004: epoch=25 step=975 epoch=25 metrics={'time_sample_batch': 0.00012787183125813803, 'time_algorithm_update': 0.006748309502234826, 'loss': 0.0058627798795126956, 'time_step': 0.007001393880599584, 'environment': 2.763009973865427, 'advantage': 0.0, 'td_error': 0.00063961338259233, 'value_scale': 0.043007198721170425} step=975
2022-02-06 19:40.23 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_975.pt


Epoch 26/50: 100%|██████████| 39/39 [00:00<00:00, 141.83it/s, loss=0.00539]


2022-02-06 19:40.24 [info     ] DiscreteCQL_20220206194004: epoch=26 step=1014 epoch=26 metrics={'time_sample_batch': 0.00010167024074456631, 'time_algorithm_update': 0.00669555786328438, 'loss': 0.005325780309832249, 'time_step': 0.0069488623203375404, 'environment': 2.8060825247402734, 'advantage': 0.0, 'td_error': 0.0006420300233322962, 'value_scale': 0.043125253170728683} step=1014
2022-02-06 19:40.24 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1014.pt


Epoch 27/50: 100%|██████████| 39/39 [00:00<00:00, 119.63it/s, loss=0.00491]


2022-02-06 19:40.25 [info     ] DiscreteCQL_20220206194004: epoch=27 step=1053 epoch=27 metrics={'time_sample_batch': 0.0001795353033603766, 'time_algorithm_update': 0.007897468713613657, 'loss': 0.004854988187360458, 'time_step': 0.00823066173455654, 'environment': 2.4259413216893604, 'advantage': 0.0, 'td_error': 0.0006214059275180261, 'value_scale': 0.04210672900080681} step=1053
2022-02-06 19:40.25 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1053.pt


Epoch 28/50: 100%|██████████| 39/39 [00:00<00:00, 134.48it/s, loss=0.00449]


2022-02-06 19:40.25 [info     ] DiscreteCQL_20220206194004: epoch=28 step=1092 epoch=28 metrics={'time_sample_batch': 0.00020502163813664363, 'time_algorithm_update': 0.006976066491542718, 'loss': 0.004438498917107399, 'time_step': 0.007282837843283629, 'environment': 2.567787741473684, 'advantage': 0.0, 'td_error': 0.0006013285848069927, 'value_scale': 0.04108962044119835} step=1092
2022-02-06 19:40.25 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1092.pt


Epoch 29/50: 100%|██████████| 39/39 [00:00<00:00, 149.42it/s, loss=0.00399]


2022-02-06 19:40.26 [info     ] DiscreteCQL_20220206194004: epoch=29 step=1131 epoch=29 metrics={'time_sample_batch': 0.00010289901342147436, 'time_algorithm_update': 0.006309221952389448, 'loss': 0.003952387475575774, 'time_step': 0.006589938432742388, 'environment': 2.796974155185119, 'advantage': 0.0, 'td_error': 0.00044096533428295004, 'value_scale': 0.031737037003040314} step=1131
2022-02-06 19:40.26 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1131.pt


Epoch 30/50: 100%|██████████| 39/39 [00:00<00:00, 140.27it/s, loss=0.00355]


2022-02-06 19:40.27 [info     ] DiscreteCQL_20220206194004: epoch=30 step=1170 epoch=30 metrics={'time_sample_batch': 0.00023015951498960837, 'time_algorithm_update': 0.006593581957694812, 'loss': 0.0035015041223512245, 'time_step': 0.006975534634712415, 'environment': 2.5365836932807793, 'advantage': 0.0, 'td_error': 0.0006149424413415261, 'value_scale': 0.04178209975361824} step=1170
2022-02-06 19:40.27 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1170.pt


Epoch 31/50: 100%|██████████| 39/39 [00:00<00:00, 136.36it/s, loss=0.00315]


2022-02-06 19:40.27 [info     ] DiscreteCQL_20220206194004: epoch=31 step=1209 epoch=31 metrics={'time_sample_batch': 0.00020450200790014022, 'time_algorithm_update': 0.006926634372808995, 'loss': 0.0031062213477129354, 'time_step': 0.0072567768585987584, 'environment': 2.6275629348655825, 'advantage': 0.0, 'td_error': 0.0005468780476967083, 'value_scale': 0.03818707540631294} step=1209
2022-02-06 19:40.27 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1209.pt


Epoch 32/50: 100%|██████████| 39/39 [00:00<00:00, 138.79it/s, loss=0.00281]


2022-02-06 19:40.28 [info     ] DiscreteCQL_20220206194004: epoch=32 step=1248 epoch=32 metrics={'time_sample_batch': 0.00020349942720853366, 'time_algorithm_update': 0.006694145691700471, 'loss': 0.0027763703002188452, 'time_step': 0.0071024283384665465, 'environment': 2.6989150723786453, 'advantage': 0.0, 'td_error': 0.0005460344372636428, 'value_scale': 0.038140252232551575} step=1248
2022-02-06 19:40.28 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1248.pt


Epoch 33/50: 100%|██████████| 39/39 [00:00<00:00, 147.73it/s, loss=0.00252]


2022-02-06 19:40.29 [info     ] DiscreteCQL_20220206194004: epoch=33 step=1287 epoch=33 metrics={'time_sample_batch': 0.0001780558855105669, 'time_algorithm_update': 0.006361038256914188, 'loss': 0.002501872571137471, 'time_step': 0.0066664830232277894, 'environment': 2.7295705307806357, 'advantage': 0.0, 'td_error': 0.0004591455896800767, 'value_scale': 0.0329403430223465} step=1287
2022-02-06 19:40.29 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1287.pt


Epoch 34/50: 100%|██████████| 39/39 [00:00<00:00, 136.36it/s, loss=0.00229]


2022-02-06 19:40.30 [info     ] DiscreteCQL_20220206194004: epoch=34 step=1326 epoch=34 metrics={'time_sample_batch': 0.00023113764249361478, 'time_algorithm_update': 0.006872293276664538, 'loss': 0.0022695099409574117, 'time_step': 0.0072315350556984926, 'environment': 2.696746188385095, 'advantage': 0.0, 'td_error': 0.00048388032563195793, 'value_scale': 0.03450576215982437} step=1326
2022-02-06 19:40.30 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1326.pt


Epoch 35/50: 100%|██████████| 39/39 [00:00<00:00, 134.95it/s, loss=0.00209]


2022-02-06 19:40.30 [info     ] DiscreteCQL_20220206194004: epoch=35 step=1365 epoch=35 metrics={'time_sample_batch': 0.00017801920572916666, 'time_algorithm_update': 0.006949730408497346, 'loss': 0.0020743084008781575, 'time_step': 0.007281609070606721, 'environment': 2.5958930891404974, 'advantage': 0.0, 'td_error': 0.0005404971652929191, 'value_scale': 0.037831440567970276} step=1365
2022-02-06 19:40.30 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1365.pt


Epoch 36/50: 100%|██████████| 39/39 [00:00<00:00, 142.85it/s, loss=0.00192]


2022-02-06 19:40.31 [info     ] DiscreteCQL_20220206194004: epoch=36 step=1404 epoch=36 metrics={'time_sample_batch': 7.691138829940405e-05, 'time_algorithm_update': 0.006693699421026768, 'loss': 0.0019028767680701537, 'time_step': 0.006897987463535407, 'environment': 2.4522887203699977, 'advantage': 0.0, 'td_error': 0.0005474956011504162, 'value_scale': 0.03822127357125282} step=1404
2022-02-06 19:40.31 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1404.pt


Epoch 37/50: 100%|██████████| 39/39 [00:00<00:00, 130.78it/s, loss=0.00177]


2022-02-06 19:40.32 [info     ] DiscreteCQL_20220206194004: epoch=37 step=1443 epoch=37 metrics={'time_sample_batch': 0.0001017008072290665, 'time_algorithm_update': 0.007289904814500075, 'loss': 0.0017541794745156015, 'time_step': 0.007570340083195613, 'environment': 2.2392030255268027, 'advantage': 0.0, 'td_error': 0.0006228835370762908, 'value_scale': 0.04218055307865143} step=1443
2022-02-06 19:40.32 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1443.pt


Epoch 38/50: 100%|██████████| 39/39 [00:00<00:00, 138.79it/s, loss=0.00164]


2022-02-06 19:40.32 [info     ] DiscreteCQL_20220206194004: epoch=38 step=1482 epoch=38 metrics={'time_sample_batch': 0.00015414678133451022, 'time_algorithm_update': 0.006821754651191907, 'loss': 0.0016220916623775011, 'time_step': 0.0071033086532201525, 'environment': 2.7008813896155073, 'advantage': 0.0, 'td_error': 0.0006814820085168094, 'value_scale': 0.04500627517700195} step=1482
2022-02-06 19:40.32 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1482.pt


Epoch 39/50: 100%|██████████| 39/39 [00:00<00:00, 148.29it/s, loss=0.00152]


2022-02-06 19:40.33 [info     ] DiscreteCQL_20220206194004: epoch=39 step=1521 epoch=39 metrics={'time_sample_batch': 0.0002051255641839443, 'time_algorithm_update': 0.006208156928037986, 'loss': 0.0015073463452072479, 'time_step': 0.006642525012676532, 'environment': 2.8681361667489047, 'advantage': 0.0, 'td_error': 0.0004994370533637493, 'value_scale': 0.035452671349048615} step=1521
2022-02-06 19:40.33 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1521.pt


Epoch 40/50: 100%|██████████| 39/39 [00:00<00:00, 145.52it/s, loss=0.00141]


2022-02-06 19:40.34 [info     ] DiscreteCQL_20220206194004: epoch=40 step=1560 epoch=40 metrics={'time_sample_batch': 0.000281829100388747, 'time_algorithm_update': 0.006385032947246845, 'loss': 0.0013976810076154578, 'time_step': 0.0067951067900046324, 'environment': 2.5265675600727158, 'advantage': 0.0, 'td_error': 0.0005285660403266945, 'value_scale': 0.03715686500072479} step=1560
2022-02-06 19:40.34 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1560.pt


Epoch 41/50: 100%|██████████| 39/39 [00:00<00:00, 144.98it/s, loss=0.00133]


2022-02-06 19:40.35 [info     ] DiscreteCQL_20220206194004: epoch=41 step=1599 epoch=41 metrics={'time_sample_batch': 7.685636862730369e-05, 'time_algorithm_update': 0.006565693097236829, 'loss': 0.0013204885778041221, 'time_step': 0.0067964700552133415, 'environment': 2.43074421370882, 'advantage': 0.0, 'td_error': 0.0005101887747649414, 'value_scale': 0.036091603338718414} step=1599
2022-02-06 19:40.35 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1599.pt


Epoch 42/50: 100%|██████████| 39/39 [00:00<00:00, 123.03it/s, loss=0.00124]


2022-02-06 19:40.35 [info     ] DiscreteCQL_20220206194004: epoch=42 step=1638 epoch=42 metrics={'time_sample_batch': 0.00023046517983461038, 'time_algorithm_update': 0.007564471318171575, 'loss': 0.0012274911143602086, 'time_step': 0.007948912107027493, 'environment': 2.6384585217215686, 'advantage': 0.0, 'td_error': 0.000487687810615256, 'value_scale': 0.034740056842565536} step=1638
2022-02-06 19:40.35 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1638.pt


Epoch 43/50: 100%|██████████| 39/39 [00:00<00:00, 128.30it/s, loss=0.00116]


2022-02-06 19:40.36 [info     ] DiscreteCQL_20220206194004: epoch=43 step=1677 epoch=43 metrics={'time_sample_batch': 0.000254209224994366, 'time_algorithm_update': 0.0073117415110270185, 'loss': 0.0011553598048452002, 'time_step': 0.007692581568008814, 'environment': 2.613450718050449, 'advantage': 0.0, 'td_error': 0.0006075462685615207, 'value_scale': 0.041407402604818344} step=1677
2022-02-06 19:40.36 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1677.pt


Epoch 44/50: 100%|██████████| 39/39 [00:00<00:00, 138.79it/s, loss=0.0011] 


2022-02-06 19:40.37 [info     ] DiscreteCQL_20220206194004: epoch=44 step=1716 epoch=44 metrics={'time_sample_batch': 0.00010216541779346955, 'time_algorithm_update': 0.006900469462076823, 'loss': 0.0010971821283396238, 'time_step': 0.007104225647755158, 'environment': 2.4479334473333227, 'advantage': 0.0, 'td_error': 0.000515392363137579, 'value_scale': 0.036396630108356476} step=1716
2022-02-06 19:40.37 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1716.pt


Epoch 45/50: 100%|██████████| 39/39 [00:00<00:00, 134.95it/s, loss=0.00104]


2022-02-06 19:40.38 [info     ] DiscreteCQL_20220206194004: epoch=45 step=1755 epoch=45 metrics={'time_sample_batch': 0.00017847158969976963, 'time_algorithm_update': 0.006898201428926908, 'loss': 0.0010389809628041126, 'time_step': 0.007281798582810622, 'environment': 2.248849861910773, 'advantage': 0.0, 'td_error': 0.0005889235640132995, 'value_scale': 0.040447670966386795} step=1755
2022-02-06 19:40.38 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1755.pt


Epoch 46/50: 100%|██████████| 39/39 [00:00<00:00, 135.63it/s, loss=0.000993]


2022-02-06 19:40.38 [info     ] DiscreteCQL_20220206194004: epoch=46 step=1794 epoch=46 metrics={'time_sample_batch': 0.00020518058385604466, 'time_algorithm_update': 0.00686039680089706, 'loss': 0.000989497610881256, 'time_step': 0.007270427850576548, 'environment': 2.7606239998323927, 'advantage': 0.0, 'td_error': 0.0005875444603091751, 'value_scale': 0.04037565365433693} step=1794
2022-02-06 19:40.38 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1794.pt


Epoch 47/50: 100%|██████████| 39/39 [00:00<00:00, 143.38it/s, loss=0.000944]


2022-02-06 19:40.39 [info     ] DiscreteCQL_20220206194004: epoch=47 step=1833 epoch=47 metrics={'time_sample_batch': 5.321624951484876e-05, 'time_algorithm_update': 0.006692348382411859, 'loss': 0.0009366603499541107, 'time_step': 0.006872800680307241, 'environment': 2.354124210633907, 'advantage': 0.0, 'td_error': 0.0005646622798138878, 'value_scale': 0.039160072803497314} step=1833
2022-02-06 19:40.39 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1833.pt


Epoch 48/50: 100%|██████████| 39/39 [00:00<00:00, 131.73it/s, loss=0.0009]  


2022-02-06 19:40.40 [info     ] DiscreteCQL_20220206194004: epoch=48 step=1872 epoch=48 metrics={'time_sample_batch': 0.00023094813028971353, 'time_algorithm_update': 0.00700221917568109, 'loss': 0.0008968108106786624, 'time_step': 0.007489638450818184, 'environment': 2.522024862091464, 'advantage': 0.0, 'td_error': 0.0007024166164200096, 'value_scale': 0.04597160220146179} step=1872
2022-02-06 19:40.40 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1872.pt


Epoch 49/50: 100%|██████████| 39/39 [00:00<00:00, 134.27it/s, loss=0.00087] 


2022-02-06 19:40.40 [info     ] DiscreteCQL_20220206194004: epoch=49 step=1911 epoch=49 metrics={'time_sample_batch': 0.0001763441623785557, 'time_algorithm_update': 0.007068933584751227, 'loss': 0.0008589333633725077, 'time_step': 0.007398140736115284, 'environment': 2.735316768516915, 'advantage': 0.0, 'td_error': 0.0005257380115324395, 'value_scale': 0.03699507191777229} step=1911
2022-02-06 19:40.40 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1911.pt


Epoch 50/50: 100%|██████████| 39/39 [00:00<00:00, 139.78it/s, loss=0.00084] 


2022-02-06 19:40.41 [info     ] DiscreteCQL_20220206194004: epoch=50 step=1950 epoch=50 metrics={'time_sample_batch': 0.00015287521557930188, 'time_algorithm_update': 0.006796971345559144, 'loss': 0.0008299540779075753, 'time_step': 0.007103608204768254, 'environment': 2.703058760858339, 'advantage': 0.0, 'td_error': 0.0004425350983989773, 'value_scale': 0.03184283897280693} step=1950
2022-02-06 19:40.41 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20220206194004\model_1950.pt
