In [2]:
import d3rlpy
import pickle

In [3]:
def load_dataset(f_path):
    with open(f_path, 'rb') as f:
        dataset = pickle.load(f)
    return dataset

def get_CQL_model():
    pixel_encoder_factory = d3rlpy.models.PixelEncoderFactory(
        filters=[[3, 2, 1], [16, 2, 1], [32, 2, 1], [64, 2, 1]],
    )
    model = d3rlpy.algos.DiscreteCQLConfig(encoder_factory=pixel_encoder_factory).create(device='cuda:0')
    return model

def get_BCQ_model():
    pixel_encoder_factory = d3rlpy.models.PixelEncoderFactory(
        filters=[[3, 2, 1], [16, 2, 1], [32, 2, 1], [64, 2, 1]],
    )
    model = d3rlpy.algos.DiscreteBCQConfig(encoder_factory=pixel_encoder_factory).create(device='cuda:0')
    return model

def train_and_save(dataset, model, save_name):
    print(len(dataset.episodes))
    model.fit(
        dataset,
        n_steps= 30000,
        n_steps_per_epoch=1000,
        save_interval=100,
    )
    model.save(save_name)     
    return True

### 5x5 World

In [4]:
base_path = '/vol/bitbucket/phl23'
dataset_prefix = 'Gridworld5x5RandomPPO'
dataset_postfix = 'Episode_dataset.pkl'

def get_5x5_dataset_path(episodes):
    base_path = '/vol/bitbucket/phl23'
    dataset_prefix = 'Gridworld5x5RandomPPO'
    dataset_postfix = 'Episode_dataset.pkl'
    return f'{base_path}/{dataset_prefix}_{episodes}{dataset_postfix}'
    

In [5]:
dataset = load_dataset(get_5x5_dataset_path(50))
model = get_CQL_model()
train_and_save(dataset, model, 'CQL_Gridworld5x5_50Episode.d3')

dataset = load_dataset(get_5x5_dataset_path(100))
model = get_CQL_model()
train_and_save(dataset, model, 'CQL_Gridworld5x5_100Episode.d3')

dataset = load_dataset(get_5x5_dataset_path(200))
model = get_CQL_model()
train_and_save(dataset, model, 'CQL_Gridworld5x5_200Episode.d3')

dataset = load_dataset(get_5x5_dataset_path(400))
model = get_CQL_model()
train_and_save(dataset, model, 'CQL_Gridworld5x5_400Episode.d3')

50
[2m2024-08-01 16:08.42[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:08.42[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240801160842[0m
[2m2024-08-01 16:08.42[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:08.46[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:08.46[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'params

Epoch 1/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.98it/s, loss=0.735, td_loss=0.0617, conservative_loss=0.673]

[2m2024-08-01 16:08.57[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008120920658111572, 'time_algorithm_update': 0.008981704235076904, 'loss': 0.7335000841021537, 'td_loss': 0.06172673691343516, 'conservative_loss': 0.6717733479738236, 'time_step': 0.009979727029800415}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.14it/s, loss=0.543, td_loss=0.0614, conservative_loss=0.482]

[2m2024-08-01 16:09.05[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008072149753570556, 'time_algorithm_update': 0.007512743234634399, 'loss': 0.5431122281551362, 'td_loss': 0.06139381177467294, 'conservative_loss': 0.4817184167653322, 'time_step': 0.008487969636917114}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.79it/s, loss=0.509, td_loss=0.059, conservative_loss=0.45] 

[2m2024-08-01 16:09.14[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008261380195617676, 'time_algorithm_update': 0.007748709917068481, 'loss': 0.5088910770714283, 'td_loss': 0.059165387098677455, 'conservative_loss': 0.4497256892323494, 'time_step': 0.008756750106811524}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:08<00:00, 118.06it/s, loss=0.479, td_loss=0.0556, conservative_loss=0.424]


[2m2024-08-01 16:09.23[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007780220508575439, 'time_algorithm_update': 0.007408185720443726, 'loss': 0.4799359897822142, 'td_loss': 0.055810660704504696, 'conservative_loss': 0.4241253292411566, 'time_step': 0.008361665964126587}[0m [36mstep[0m=[35m4000[0m


Epoch 5/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.64it/s, loss=0.458, td_loss=0.0522, conservative_loss=0.406]

[2m2024-08-01 16:09.32[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008480587005615235, 'time_algorithm_update': 0.007816765546798707, 'loss': 0.45796638931334016, 'td_loss': 0.052228756818454715, 'conservative_loss': 0.4057376330643892, 'time_step': 0.00884727430343628}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.22it/s, loss=0.441, td_loss=0.0506, conservative_loss=0.39]


[2m2024-08-01 16:09.40[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008012790679931641, 'time_algorithm_update': 0.007511407136917114, 'loss': 0.440594688937068, 'td_loss': 0.0505426576253958, 'conservative_loss': 0.3900520304888487, 'time_step': 0.008488827228546143}[0m [36mstep[0m=[35m6000[0m


Epoch 7/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.48it/s, loss=0.423, td_loss=0.0493, conservative_loss=0.373]

[2m2024-08-01 16:09.49[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008408780097961426, 'time_algorithm_update': 0.007848692178726196, 'loss': 0.4223264917433262, 'td_loss': 0.04929086049553007, 'conservative_loss': 0.37303563170135023, 'time_step': 0.00885977840423584}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.82it/s, loss=0.416, td_loss=0.0472, conservative_loss=0.369]

[2m2024-08-01 16:09.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008365752696990967, 'time_algorithm_update': 0.007820647954940797, 'loss': 0.4155057374089956, 'td_loss': 0.04711300663626753, 'conservative_loss': 0.3683927311375737, 'time_step': 0.008828788280487061}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.30it/s, loss=0.404, td_loss=0.0423, conservative_loss=0.362]

[2m2024-08-01 16:10.07[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000803239107131958, 'time_algorithm_update': 0.007497962474822998, 'loss': 0.40408810952305796, 'td_loss': 0.04218133192136884, 'conservative_loss': 0.361906777895987, 'time_step': 0.008488477230072022}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.61it/s, loss=0.399, td_loss=0.0381, conservative_loss=0.361]

[2m2024-08-01 16:10.15[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000819429874420166, 'time_algorithm_update': 0.007610758543014526, 'loss': 0.3993577235341072, 'td_loss': 0.03803160726651549, 'conservative_loss': 0.3613261161148548, 'time_step': 0.008617303371429443}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.60it/s, loss=0.39, td_loss=0.0377, conservative_loss=0.353]

[2m2024-08-01 16:10.24[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008051455020904541, 'time_algorithm_update': 0.0075573911666870115, 'loss': 0.3904301271289587, 'td_loss': 0.037652394461911175, 'conservative_loss': 0.35277773314714433, 'time_step': 0.008538132667541503}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.54it/s, loss=0.393, td_loss=0.0362, conservative_loss=0.357]

[2m2024-08-01 16:10.33[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008217463493347168, 'time_algorithm_update': 0.007700748920440674, 'loss': 0.39313957293331625, 'td_loss': 0.03627274023066275, 'conservative_loss': 0.35686683240532874, 'time_step': 0.008697825908660889}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.80it/s, loss=0.398, td_loss=0.0362, conservative_loss=0.361]

[2m2024-08-01 16:10.42[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000833134651184082, 'time_algorithm_update': 0.007851440668106079, 'loss': 0.3976283096075058, 'td_loss': 0.03625002586003393, 'conservative_loss': 0.36137828321754933, 'time_step': 0.008881845712661743}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.37it/s, loss=0.399, td_loss=0.0359, conservative_loss=0.363]

[2m2024-08-01 16:10.51[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008289861679077148, 'time_algorithm_update': 0.007900196075439453, 'loss': 0.3982713487446308, 'td_loss': 0.035854559226427225, 'conservative_loss': 0.36241678991913795, 'time_step': 0.008923890113830567}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.31it/s, loss=0.391, td_loss=0.0351, conservative_loss=0.356]

[2m2024-08-01 16:11.00[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008471574783325195, 'time_algorithm_update': 0.00784913969039917, 'loss': 0.39076703345775604, 'td_loss': 0.03498187192506157, 'conservative_loss': 0.35578516133129595, 'time_step': 0.00887244439125061}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.07it/s, loss=0.39, td_loss=0.0351, conservative_loss=0.355]

[2m2024-08-01 16:11.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007880380153656006, 'time_algorithm_update': 0.007560601949691772, 'loss': 0.3894434994086623, 'td_loss': 0.0350755722009344, 'conservative_loss': 0.3543679272681475, 'time_step': 0.008512778759002686}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:09<00:00, 104.79it/s, loss=0.418, td_loss=0.0556, conservative_loss=0.362]


[2m2024-08-01 16:11.18[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008735074996948242, 'time_algorithm_update': 0.00831567668914795, 'loss': 0.4172701687514782, 'td_loss': 0.055530687542865054, 'conservative_loss': 0.3617394813448191, 'time_step': 0.009386986494064331}[0m [36mstep[0m=[35m17000[0m


Epoch 18/30: 100%|██████████| 1000/1000 [00:09<00:00, 111.08it/s, loss=0.416, td_loss=0.0535, conservative_loss=0.362]

[2m2024-08-01 16:11.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008341524600982666, 'time_algorithm_update': 0.007851689338684083, 'loss': 0.41524363231658934, 'td_loss': 0.05327160448441282, 'conservative_loss': 0.3619720280468464, 'time_step': 0.008879626035690308}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.17it/s, loss=0.41, td_loss=0.0525, conservative_loss=0.358]


[2m2024-08-01 16:11.36[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008052525520324707, 'time_algorithm_update': 0.007453166007995606, 'loss': 0.4104188666343689, 'td_loss': 0.05257697378541343, 'conservative_loss': 0.35784189274907113, 'time_step': 0.008427733182907104}[0m [36mstep[0m=[35m19000[0m


Epoch 20/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.25it/s, loss=0.409, td_loss=0.0528, conservative_loss=0.356]


[2m2024-08-01 16:11.45[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008269395828247071, 'time_algorithm_update': 0.0076934704780578615, 'loss': 0.4088552393913269, 'td_loss': 0.052670413086889314, 'conservative_loss': 0.35618482644855975, 'time_step': 0.008714855194091797}[0m [36mstep[0m=[35m20000[0m


Epoch 21/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.93it/s, loss=0.409, td_loss=0.0508, conservative_loss=0.358]

[2m2024-08-01 16:11.54[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008238131999969482, 'time_algorithm_update': 0.007887948274612427, 'loss': 0.40913877415657046, 'td_loss': 0.050881006096489725, 'conservative_loss': 0.35825776809453963, 'time_step': 0.00889188528060913}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.54it/s, loss=0.414, td_loss=0.0525, conservative_loss=0.362]

[2m2024-08-01 16:12.02[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007985589504241943, 'time_algorithm_update': 0.007437202215194702, 'loss': 0.4145932999402285, 'td_loss': 0.052524434406077486, 'conservative_loss': 0.36206886561214924, 'time_step': 0.008399044036865234}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.36it/s, loss=0.413, td_loss=0.0533, conservative_loss=0.359]

[2m2024-08-01 16:12.11[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000833186149597168, 'time_algorithm_update': 0.007788503646850586, 'loss': 0.4127628984749317, 'td_loss': 0.053150538715999576, 'conservative_loss': 0.359612359598279, 'time_step': 0.00878590989112854}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.64it/s, loss=0.409, td_loss=0.0521, conservative_loss=0.357]

[2m2024-08-01 16:12.20[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008237974643707276, 'time_algorithm_update': 0.007700926542282104, 'loss': 0.40948986080288885, 'td_loss': 0.05222665561502799, 'conservative_loss': 0.35726320546865464, 'time_step': 0.008689982891082764}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.36it/s, loss=0.421, td_loss=0.0641, conservative_loss=0.357]

[2m2024-08-01 16:12.29[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008364515304565429, 'time_algorithm_update': 0.00786321210861206, 'loss': 0.42127841736376287, 'td_loss': 0.06426821237290278, 'conservative_loss': 0.3570102046728134, 'time_step': 0.008872126817703246}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.47it/s, loss=0.425, td_loss=0.0629, conservative_loss=0.362]

[2m2024-08-01 16:12.37[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008022925853729249, 'time_algorithm_update': 0.007433655023574829, 'loss': 0.4249970760494471, 'td_loss': 0.06291621697181836, 'conservative_loss': 0.36208085939288137, 'time_step': 0.008413153409957885}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.55it/s, loss=0.427, td_loss=0.0646, conservative_loss=0.362]

[2m2024-08-01 16:12.47[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008481800556182862, 'time_algorithm_update': 0.007986873388290405, 'loss': 0.4271963617503643, 'td_loss': 0.0647531677945517, 'conservative_loss': 0.3624431942552328, 'time_step': 0.008998536348342896}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.69it/s, loss=0.419, td_loss=0.0609, conservative_loss=0.359]

[2m2024-08-01 16:12.56[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008306844234466553, 'time_algorithm_update': 0.007747213363647461, 'loss': 0.41943322916328907, 'td_loss': 0.060908093204721805, 'conservative_loss': 0.3585251358896494, 'time_step': 0.00875611400604248}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.07it/s, loss=0.427, td_loss=0.0635, conservative_loss=0.363]


[2m2024-08-01 16:13.04[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008112354278564454, 'time_algorithm_update': 0.007612429141998291, 'loss': 0.4262726713567972, 'td_loss': 0.0634315141425468, 'conservative_loss': 0.3628411577939987, 'time_step': 0.008584699153900147}[0m [36mstep[0m=[35m29000[0m


Epoch 30/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.93it/s, loss=0.425, td_loss=0.0618, conservative_loss=0.363]

[2m2024-08-01 16:13.13[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801160842: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008234198093414306, 'time_algorithm_update': 0.007645490646362304, 'loss': 0.42496812728047373, 'td_loss': 0.06181664639338851, 'conservative_loss': 0.3631514802873135, 'time_step': 0.008659136533737182}[0m [36mstep[0m=[35m30000[0m





100
[2m2024-08-01 16:13.13[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:13.13[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240801161313[0m
[2m2024-08-01 16:13.13[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:13.14[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:13.14[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'param

Epoch 1/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.05it/s, loss=0.787, td_loss=0.0564, conservative_loss=0.731]

[2m2024-08-01 16:13.22[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008222773075103759, 'time_algorithm_update': 0.007510889768600464, 'loss': 0.7859764170646667, 'td_loss': 0.05658776545710862, 'conservative_loss': 0.7293886507153511, 'time_step': 0.00851094627380371}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.28it/s, loss=0.574, td_loss=0.0742, conservative_loss=0.5] 

[2m2024-08-01 16:13.31[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008416798114776611, 'time_algorithm_update': 0.00776931381225586, 'loss': 0.5734865417331457, 'td_loss': 0.07410704692453146, 'conservative_loss': 0.4993794955611229, 'time_step': 0.008790260553359986}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.10it/s, loss=0.539, td_loss=0.0744, conservative_loss=0.465]

[2m2024-08-01 16:13.40[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008261919021606446, 'time_algorithm_update': 0.007518392562866211, 'loss': 0.5389843555092811, 'td_loss': 0.07420859980909154, 'conservative_loss': 0.4647757551670074, 'time_step': 0.008513611793518066}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.89it/s, loss=0.522, td_loss=0.0722, conservative_loss=0.45]

[2m2024-08-01 16:13.49[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008566501140594482, 'time_algorithm_update': 0.007799993753433227, 'loss': 0.5218742965012789, 'td_loss': 0.07226057640369982, 'conservative_loss': 0.4496137202382088, 'time_step': 0.008823264122009277}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.91it/s, loss=0.499, td_loss=0.0692, conservative_loss=0.43]

[2m2024-08-01 16:13.57[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008101418018341065, 'time_algorithm_update': 0.007396375179290772, 'loss': 0.49861687946319583, 'td_loss': 0.06913377523049712, 'conservative_loss': 0.4294831039905548, 'time_step': 0.00838080096244812}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.92it/s, loss=0.498, td_loss=0.0686, conservative_loss=0.429]

[2m2024-08-01 16:14.06[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008492829799652099, 'time_algorithm_update': 0.00788964319229126, 'loss': 0.4976193521916866, 'td_loss': 0.06865524670481682, 'conservative_loss': 0.4289641046375036, 'time_step': 0.008910983800888062}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:08<00:00, 120.45it/s, loss=0.489, td_loss=0.0697, conservative_loss=0.42]

[2m2024-08-01 16:14.15[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008185870647430419, 'time_algorithm_update': 0.007208961248397827, 'loss': 0.48924581053853033, 'td_loss': 0.06966526414919645, 'conservative_loss': 0.4195805461108685, 'time_step': 0.008201246738433837}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.28it/s, loss=0.494, td_loss=0.0661, conservative_loss=0.428]

[2m2024-08-01 16:14.24[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008748466968536377, 'time_algorithm_update': 0.007737711906433105, 'loss': 0.4934795586168766, 'td_loss': 0.06586944054253399, 'conservative_loss': 0.42761011780798436, 'time_step': 0.008786965131759644}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.86it/s, loss=0.482, td_loss=0.0605, conservative_loss=0.422]


[2m2024-08-01 16:14.32[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008125321865081787, 'time_algorithm_update': 0.007397076606750488, 'loss': 0.4818134424686432, 'td_loss': 0.06033758063055575, 'conservative_loss': 0.4214758622646332, 'time_step': 0.008387799978256226}[0m [36mstep[0m=[35m9000[0m


Epoch 10/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.81it/s, loss=0.474, td_loss=0.0528, conservative_loss=0.421]

[2m2024-08-01 16:14.41[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008446033000946045, 'time_algorithm_update': 0.007732762336730957, 'loss': 0.4734795588552952, 'td_loss': 0.052605467321584, 'conservative_loss': 0.4208740913271904, 'time_step': 0.008751354694366454}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.28it/s, loss=0.47, td_loss=0.0544, conservative_loss=0.416]

[2m2024-08-01 16:14.49[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008247537612915039, 'time_algorithm_update': 0.007425611734390259, 'loss': 0.46969943881034854, 'td_loss': 0.054336756281554696, 'conservative_loss': 0.4153626827001572, 'time_step': 0.008414862632751464}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.54it/s, loss=0.463, td_loss=0.0525, conservative_loss=0.41]

[2m2024-08-01 16:14.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00085231614112854, 'time_algorithm_update': 0.0076687936782836915, 'loss': 0.46283485169708727, 'td_loss': 0.052525496942922474, 'conservative_loss': 0.4103093549460173, 'time_step': 0.008696897983551026}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:08<00:00, 118.32it/s, loss=0.466, td_loss=0.0511, conservative_loss=0.415]


[2m2024-08-01 16:15.07[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008229455947875977, 'time_algorithm_update': 0.007341741800308227, 'loss': 0.4655572147667408, 'td_loss': 0.05100529439607635, 'conservative_loss': 0.4145519203096628, 'time_step': 0.0083427894115448}[0m [36mstep[0m=[35m13000[0m


Epoch 14/30: 100%|██████████| 1000/1000 [00:09<00:00, 108.24it/s, loss=0.466, td_loss=0.0521, conservative_loss=0.414]

[2m2024-08-01 16:15.16[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008799619674682617, 'time_algorithm_update': 0.008061233043670655, 'loss': 0.4654178833961487, 'td_loss': 0.052067963671172036, 'conservative_loss': 0.4133499190062285, 'time_step': 0.009118657112121582}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:08<00:00, 118.14it/s, loss=0.461, td_loss=0.0503, conservative_loss=0.41]


[2m2024-08-01 16:15.24[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008074519634246826, 'time_algorithm_update': 0.007392745971679687, 'loss': 0.46098416405916215, 'td_loss': 0.05038484899001196, 'conservative_loss': 0.41059931528568266, 'time_step': 0.008364474058151244}[0m [36mstep[0m=[35m15000[0m


Epoch 16/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.85it/s, loss=0.463, td_loss=0.0498, conservative_loss=0.414]

[2m2024-08-01 16:15.34[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008653426170349122, 'time_algorithm_update': 0.007938006401062011, 'loss': 0.4627099597156048, 'td_loss': 0.0496344119629357, 'conservative_loss': 0.413075547426939, 'time_step': 0.008988748788833618}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.10it/s, loss=0.477, td_loss=0.0664, conservative_loss=0.41]


[2m2024-08-01 16:15.42[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008399078845977784, 'time_algorithm_update': 0.0076549844741821286, 'loss': 0.47627781696617605, 'td_loss': 0.06631505775172264, 'conservative_loss': 0.4099627585709095, 'time_step': 0.008705174684524536}[0m [36mstep[0m=[35m17000[0m


Epoch 18/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.00it/s, loss=0.474, td_loss=0.0606, conservative_loss=0.413]

[2m2024-08-01 16:15.51[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008538076877593994, 'time_algorithm_update': 0.007789126396179199, 'loss': 0.4738672656714916, 'td_loss': 0.060500640640966594, 'conservative_loss': 0.41336662490665915, 'time_step': 0.008817867994308472}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:09<00:00, 108.80it/s, loss=0.468, td_loss=0.0594, conservative_loss=0.408]

[2m2024-08-01 16:16.01[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008565125465393067, 'time_algorithm_update': 0.007996642112731933, 'loss': 0.4682981566488743, 'td_loss': 0.05950929365167394, 'conservative_loss': 0.4087888622432947, 'time_step': 0.009056070566177368}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.16it/s, loss=0.469, td_loss=0.0611, conservative_loss=0.408]


[2m2024-08-01 16:16.10[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008454289436340332, 'time_algorithm_update': 0.007854679822921753, 'loss': 0.4692681999951601, 'td_loss': 0.06110819437168539, 'conservative_loss': 0.40816000628471377, 'time_step': 0.00888251280784607}[0m [36mstep[0m=[35m20000[0m


Epoch 21/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.58it/s, loss=0.467, td_loss=0.06, conservative_loss=0.407] 

[2m2024-08-01 16:16.18[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008153259754180908, 'time_algorithm_update': 0.007407707452774048, 'loss': 0.4668835398554802, 'td_loss': 0.05988491057790816, 'conservative_loss': 0.4069986292421818, 'time_step': 0.008399630069732666}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.88it/s, loss=0.463, td_loss=0.0592, conservative_loss=0.404]

[2m2024-08-01 16:16.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008577959537506103, 'time_algorithm_update': 0.007873429536819458, 'loss': 0.46285799227654933, 'td_loss': 0.05910828156908974, 'conservative_loss': 0.40374971069395543, 'time_step': 0.008904558181762695}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.99it/s, loss=0.46, td_loss=0.0576, conservative_loss=0.403]

[2m2024-08-01 16:16.36[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008485224246978759, 'time_algorithm_update': 0.007927387237548829, 'loss': 0.46012935230135915, 'td_loss': 0.05746383021632209, 'conservative_loss': 0.4026655216962099, 'time_step': 0.008960615873336792}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.79it/s, loss=0.455, td_loss=0.0586, conservative_loss=0.396]

[2m2024-08-01 16:16.45[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008548979759216309, 'time_algorithm_update': 0.007944607257843017, 'loss': 0.45454071585834027, 'td_loss': 0.05852437131013721, 'conservative_loss': 0.39601634405553343, 'time_step': 0.008978708505630493}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.10it/s, loss=0.489, td_loss=0.0745, conservative_loss=0.415]

[2m2024-08-01 16:16.54[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008214426040649414, 'time_algorithm_update': 0.007526325225830078, 'loss': 0.48846862713992595, 'td_loss': 0.07426124786259607, 'conservative_loss': 0.4142073799818754, 'time_step': 0.008513535261154176}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:09<00:00, 104.35it/s, loss=0.485, td_loss=0.0712, conservative_loss=0.414]

[2m2024-08-01 16:17.04[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008749806880950928, 'time_algorithm_update': 0.008391840934753418, 'loss': 0.48474869434535506, 'td_loss': 0.07116906220512464, 'conservative_loss': 0.413579631999135, 'time_step': 0.009451950788497926}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:08<00:00, 119.26it/s, loss=0.483, td_loss=0.0708, conservative_loss=0.412]

[2m2024-08-01 16:17.12[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008087906837463379, 'time_algorithm_update': 0.007284331560134888, 'loss': 0.482260364279151, 'td_loss': 0.0706860516546294, 'conservative_loss': 0.41157431304454806, 'time_step': 0.00827080249786377}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.25it/s, loss=0.473, td_loss=0.0699, conservative_loss=0.403]

[2m2024-08-01 16:17.21[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008499891757965087, 'time_algorithm_update': 0.007860074520111084, 'loss': 0.47330973060429093, 'td_loss': 0.06999959383765235, 'conservative_loss': 0.40331013636291024, 'time_step': 0.008881076335906983}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:08<00:00, 119.13it/s, loss=0.47, td_loss=0.0694, conservative_loss=0.401]

[2m2024-08-01 16:17.29[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008001492023468017, 'time_algorithm_update': 0.007323287725448609, 'loss': 0.4700115924924612, 'td_loss': 0.0693803212014027, 'conservative_loss': 0.4006312704980373, 'time_step': 0.008289604902267456}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.37it/s, loss=0.481, td_loss=0.0717, conservative_loss=0.409]

[2m2024-08-01 16:17.38[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161313: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008470704555511475, 'time_algorithm_update': 0.007686447143554687, 'loss': 0.48014276097714903, 'td_loss': 0.07165742565132678, 'conservative_loss': 0.40848533555865285, 'time_step': 0.008708304405212402}[0m [36mstep[0m=[35m30000[0m





200
[2m2024-08-01 16:17.39[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:17.39[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240801161739[0m
[2m2024-08-01 16:17.39[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:17.39[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:17.39[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'param

Epoch 1/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.59it/s, loss=0.77, td_loss=0.0591, conservative_loss=0.711]

[2m2024-08-01 16:17.48[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008235278129577637, 'time_algorithm_update': 0.007535090208053589, 'loss': 0.7688519340753556, 'td_loss': 0.059143070181831715, 'conservative_loss': 0.7097088649570942, 'time_step': 0.008533966064453126}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.47it/s, loss=0.587, td_loss=0.0803, conservative_loss=0.507]

[2m2024-08-01 16:17.56[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008470933437347412, 'time_algorithm_update': 0.007671458721160889, 'loss': 0.5864812502264977, 'td_loss': 0.080204686564859, 'conservative_loss': 0.5062765627801419, 'time_step': 0.008702311515808105}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.83it/s, loss=0.556, td_loss=0.0815, conservative_loss=0.475]

[2m2024-08-01 16:18.06[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008671014308929444, 'time_algorithm_update': 0.007929494857788086, 'loss': 0.5556710726320744, 'td_loss': 0.08133156225644052, 'conservative_loss': 0.474339510679245, 'time_step': 0.008975396633148194}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.85it/s, loss=0.531, td_loss=0.0787, conservative_loss=0.452]

[2m2024-08-01 16:18.14[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008421952724456787, 'time_algorithm_update': 0.007577330827713013, 'loss': 0.530916198477149, 'td_loss': 0.07880897073447704, 'conservative_loss': 0.4521072274744511, 'time_step': 0.008596297025680542}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.30it/s, loss=0.516, td_loss=0.0785, conservative_loss=0.438]

[2m2024-08-01 16:18.23[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008736016750335693, 'time_algorithm_update': 0.00773498010635376, 'loss': 0.5166843722313642, 'td_loss': 0.07874683279031888, 'conservative_loss': 0.4379375395178795, 'time_step': 0.008793220043182374}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.46it/s, loss=0.52, td_loss=0.0771, conservative_loss=0.443]

[2m2024-08-01 16:18.32[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000836564302444458, 'time_algorithm_update': 0.007613765001296997, 'loss': 0.5194216968566179, 'td_loss': 0.07698561902344227, 'conservative_loss': 0.44243607787787914, 'time_step': 0.008620251893997193}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.40it/s, loss=0.516, td_loss=0.0765, conservative_loss=0.44]

[2m2024-08-01 16:18.41[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008348405361175537, 'time_algorithm_update': 0.007569742679595947, 'loss': 0.5163830890953541, 'td_loss': 0.07648490006686189, 'conservative_loss': 0.43989818942546843, 'time_step': 0.008564601182937621}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.44it/s, loss=0.511, td_loss=0.0754, conservative_loss=0.436]

[2m2024-08-01 16:18.50[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008503096103668213, 'time_algorithm_update': 0.007742779493331909, 'loss': 0.5119968593269587, 'td_loss': 0.07571928443596698, 'conservative_loss': 0.4362775747925043, 'time_step': 0.008772490501403808}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.79it/s, loss=0.512, td_loss=0.0756, conservative_loss=0.437]

[2m2024-08-01 16:18.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008522551059722901, 'time_algorithm_update': 0.007800800561904907, 'loss': 0.5116064478307962, 'td_loss': 0.07530760460207239, 'conservative_loss': 0.4362988430559635, 'time_step': 0.008837136268615722}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.28it/s, loss=0.511, td_loss=0.0736, conservative_loss=0.437]

[2m2024-08-01 16:19.07[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008174815177917481, 'time_algorithm_update': 0.0074430651664733885, 'loss': 0.510207177862525, 'td_loss': 0.07339409698895179, 'conservative_loss': 0.4368130812048912, 'time_step': 0.0084197998046875}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.72it/s, loss=0.494, td_loss=0.0702, conservative_loss=0.423]


[2m2024-08-01 16:19.16[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008520052433013916, 'time_algorithm_update': 0.007731463432312012, 'loss': 0.49383742675185205, 'td_loss': 0.07036154874623753, 'conservative_loss': 0.4234758776426315, 'time_step': 0.008759551048278809}[0m [36mstep[0m=[35m11000[0m


Epoch 12/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.22it/s, loss=0.509, td_loss=0.0727, conservative_loss=0.437]


[2m2024-08-01 16:19.25[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008510189056396484, 'time_algorithm_update': 0.0076929514408111575, 'loss': 0.50833596855402, 'td_loss': 0.07246481988183223, 'conservative_loss': 0.4358711488097906, 'time_step': 0.008723888158798218}[0m [36mstep[0m=[35m12000[0m


Epoch 13/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.57it/s, loss=0.504, td_loss=0.0699, conservative_loss=0.434]

[2m2024-08-01 16:19.33[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008333973884582519, 'time_algorithm_update': 0.007620869874954223, 'loss': 0.5051886088848114, 'td_loss': 0.07027312980382704, 'conservative_loss': 0.4349154795855284, 'time_step': 0.008623602390289307}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.46it/s, loss=0.501, td_loss=0.0702, conservative_loss=0.431]


[2m2024-08-01 16:19.42[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008540520668029785, 'time_algorithm_update': 0.007750698089599609, 'loss': 0.500483757480979, 'td_loss': 0.07003432706464081, 'conservative_loss': 0.4304494300186634, 'time_step': 0.008780410289764404}[0m [36mstep[0m=[35m14000[0m


Epoch 15/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.62it/s, loss=0.495, td_loss=0.0699, conservative_loss=0.425]

[2m2024-08-01 16:19.51[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000841252326965332, 'time_algorithm_update': 0.007747438192367554, 'loss': 0.49470588359236717, 'td_loss': 0.06987602238892578, 'conservative_loss': 0.42482986085116864, 'time_step': 0.008762856721878052}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.78it/s, loss=0.495, td_loss=0.0686, conservative_loss=0.427]

[2m2024-08-01 16:20.00[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008176734447479248, 'time_algorithm_update': 0.007401504993438721, 'loss': 0.49557877755165103, 'td_loss': 0.06865646932343952, 'conservative_loss': 0.42692230868339537, 'time_step': 0.008386014461517334}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.89it/s, loss=0.503, td_loss=0.0817, conservative_loss=0.422]

[2m2024-08-01 16:20.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008448824882507324, 'time_algorithm_update': 0.007653316736221313, 'loss': 0.5030744658410549, 'td_loss': 0.08151045218668879, 'conservative_loss': 0.421564013376832, 'time_step': 0.00867151927947998}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.37it/s, loss=0.504, td_loss=0.0759, conservative_loss=0.428]

[2m2024-08-01 16:20.18[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000864187240600586, 'time_algorithm_update': 0.0078088057041168215, 'loss': 0.5044450578242541, 'td_loss': 0.0759948728857562, 'conservative_loss': 0.42845018558204173, 'time_step': 0.008865605115890502}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.34it/s, loss=0.5, td_loss=0.0742, conservative_loss=0.426] 

[2m2024-08-01 16:20.26[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008189802169799805, 'time_algorithm_update': 0.007479633808135986, 'loss': 0.5008351549357176, 'td_loss': 0.07456347779883071, 'conservative_loss': 0.4262716771960258, 'time_step': 0.008489609241485596}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.25it/s, loss=0.493, td_loss=0.0731, conservative_loss=0.42]

[2m2024-08-01 16:20.35[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008403091430664063, 'time_algorithm_update': 0.0076939697265625, 'loss': 0.49362470860779284, 'td_loss': 0.07348364862822927, 'conservative_loss': 0.42014106011390684, 'time_step': 0.008709947347640992}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.44it/s, loss=0.503, td_loss=0.0743, conservative_loss=0.429]

[2m2024-08-01 16:20.44[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008419225215911865, 'time_algorithm_update': 0.007613973379135132, 'loss': 0.5028050048053264, 'td_loss': 0.07416379237733782, 'conservative_loss': 0.42864121259748933, 'time_step': 0.008628541707992553}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.07it/s, loss=0.505, td_loss=0.0753, conservative_loss=0.429]

[2m2024-08-01 16:20.52[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008350365161895752, 'time_algorithm_update': 0.007515012502670288, 'loss': 0.5045165941119194, 'td_loss': 0.07520072261837776, 'conservative_loss': 0.42931587153673173, 'time_step': 0.008513350009918213}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.82it/s, loss=0.503, td_loss=0.0735, conservative_loss=0.43]


[2m2024-08-01 16:21.01[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008414301872253418, 'time_algorithm_update': 0.007566100597381592, 'loss': 0.5027723610252142, 'td_loss': 0.0732865682633128, 'conservative_loss': 0.42948579232394696, 'time_step': 0.008593925952911377}[0m [36mstep[0m=[35m23000[0m


Epoch 24/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.36it/s, loss=0.503, td_loss=0.0734, conservative_loss=0.43]

[2m2024-08-01 16:21.10[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000861119031906128, 'time_algorithm_update': 0.007748783826828003, 'loss': 0.5030768349319696, 'td_loss': 0.07325439539179206, 'conservative_loss': 0.42982244010269643, 'time_step': 0.008785168170928954}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.55it/s, loss=0.517, td_loss=0.0887, conservative_loss=0.429]

[2m2024-08-01 16:21.19[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008463315963745117, 'time_algorithm_update': 0.0076628150939941405, 'loss': 0.5171273855566978, 'td_loss': 0.08842105863895268, 'conservative_loss': 0.42870632648468016, 'time_step': 0.008691612720489503}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.96it/s, loss=0.518, td_loss=0.0861, conservative_loss=0.432]

[2m2024-08-01 16:21.28[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008248748779296875, 'time_algorithm_update': 0.007497633695602417, 'loss': 0.517512579664588, 'td_loss': 0.08585333903320133, 'conservative_loss': 0.4316592400670052, 'time_step': 0.00850718355178833}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.35it/s, loss=0.51, td_loss=0.0836, conservative_loss=0.426]

[2m2024-08-01 16:21.36[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008564937114715576, 'time_algorithm_update': 0.007747988224029541, 'loss': 0.5102343603670597, 'td_loss': 0.08382536232797429, 'conservative_loss': 0.42640899781882763, 'time_step': 0.008793168306350707}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.37it/s, loss=0.511, td_loss=0.0864, conservative_loss=0.425]

[2m2024-08-01 16:21.45[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008376364707946778, 'time_algorithm_update': 0.0077827627658844, 'loss': 0.5115349235832691, 'td_loss': 0.08642843560222536, 'conservative_loss': 0.42510648810863494, 'time_step': 0.008794229984283447}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:08<00:00, 120.00it/s, loss=0.52, td_loss=0.085, conservative_loss=0.435] 

[2m2024-08-01 16:21.54[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008114156723022461, 'time_algorithm_update': 0.007260982036590576, 'loss': 0.5196899053901434, 'td_loss': 0.08507641743030399, 'conservative_loss': 0.43461348804831507, 'time_step': 0.008232985258102416}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.27it/s, loss=0.512, td_loss=0.0862, conservative_loss=0.426]

[2m2024-08-01 16:22.02[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801161739: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000841825008392334, 'time_algorithm_update': 0.007632646322250366, 'loss': 0.5115212853699923, 'td_loss': 0.08618052266398445, 'conservative_loss': 0.4253407636731863, 'time_step': 0.00864280104637146}[0m [36mstep[0m=[35m30000[0m





400
[2m2024-08-01 16:22.03[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:22.03[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240801162203[0m
[2m2024-08-01 16:22.03[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:22.03[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:22.03[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'param

Epoch 1/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.87it/s, loss=0.704, td_loss=0.0766, conservative_loss=0.627]


[2m2024-08-01 16:22.12[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008666086196899414, 'time_algorithm_update': 0.007849449396133423, 'loss': 0.7032287154793739, 'td_loss': 0.07676548110879958, 'conservative_loss': 0.6264632341563702, 'time_step': 0.008905350685119629}[0m [36mstep[0m=[35m1000[0m


Epoch 2/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.48it/s, loss=0.571, td_loss=0.0899, conservative_loss=0.481]

[2m2024-08-01 16:22.21[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008617565631866455, 'time_algorithm_update': 0.007662061929702759, 'loss': 0.5710143171995878, 'td_loss': 0.08988047680724412, 'conservative_loss': 0.48113384094834327, 'time_step': 0.008698796987533569}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.76it/s, loss=0.558, td_loss=0.0903, conservative_loss=0.467]

[2m2024-08-01 16:22.30[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008448190689086914, 'time_algorithm_update': 0.007569063663482666, 'loss': 0.5572121232450008, 'td_loss': 0.09015449965791776, 'conservative_loss': 0.46705762308835985, 'time_step': 0.008605551719665528}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.09it/s, loss=0.55, td_loss=0.0896, conservative_loss=0.46] 

[2m2024-08-01 16:22.38[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008543326854705811, 'time_algorithm_update': 0.0075441417694091795, 'loss': 0.5495480898171663, 'td_loss': 0.08954941464588045, 'conservative_loss': 0.4599986754208803, 'time_step': 0.008582873821258545}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.64it/s, loss=0.54, td_loss=0.0896, conservative_loss=0.451]

[2m2024-08-01 16:22.47[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008633944988250732, 'time_algorithm_update': 0.007670298099517823, 'loss': 0.5406446936577558, 'td_loss': 0.08961166596552357, 'conservative_loss': 0.4510330284535885, 'time_step': 0.008694910049438476}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.46it/s, loss=0.542, td_loss=0.0865, conservative_loss=0.456]

[2m2024-08-01 16:22.56[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008365883827209473, 'time_algorithm_update': 0.007467109918594361, 'loss': 0.54274915368855, 'td_loss': 0.08662896694522351, 'conservative_loss': 0.45612018717825414, 'time_step': 0.008479439258575439}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.47it/s, loss=0.544, td_loss=0.0874, conservative_loss=0.457]

[2m2024-08-01 16:23.05[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008541722297668457, 'time_algorithm_update': 0.007734746217727661, 'loss': 0.5433587558716536, 'td_loss': 0.08710274185752497, 'conservative_loss': 0.4562560140788555, 'time_step': 0.008770868062973022}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.45it/s, loss=0.535, td_loss=0.0872, conservative_loss=0.448]

[2m2024-08-01 16:23.13[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000845930814743042, 'time_algorithm_update': 0.00755318284034729, 'loss': 0.5346636853069067, 'td_loss': 0.08705498884385451, 'conservative_loss': 0.44760869635641576, 'time_step': 0.008557019710540771}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.31it/s, loss=0.531, td_loss=0.0818, conservative_loss=0.449]


[2m2024-08-01 16:23.22[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008503818511962891, 'time_algorithm_update': 0.007595994234085083, 'loss': 0.5312496867775917, 'td_loss': 0.08188660457823425, 'conservative_loss': 0.4493630827665329, 'time_step': 0.008626733779907227}[0m [36mstep[0m=[35m9000[0m


Epoch 10/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.99it/s, loss=0.534, td_loss=0.0789, conservative_loss=0.455]

[2m2024-08-01 16:23.31[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008744444847106934, 'time_algorithm_update': 0.00776308012008667, 'loss': 0.5337957851439714, 'td_loss': 0.07870227258233353, 'conservative_loss': 0.4550935119390488, 'time_step': 0.008818922758102418}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.34it/s, loss=0.53, td_loss=0.0761, conservative_loss=0.454]

[2m2024-08-01 16:23.40[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008594613075256348, 'time_algorithm_update': 0.007771507024765014, 'loss': 0.5295810654163361, 'td_loss': 0.0756831835382618, 'conservative_loss': 0.4538978817015886, 'time_step': 0.008794325113296508}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.66it/s, loss=0.526, td_loss=0.0759, conservative_loss=0.45]

[2m2024-08-01 16:23.49[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008363077640533447, 'time_algorithm_update': 0.0075298469066619874, 'loss': 0.5257240241914988, 'td_loss': 0.07568444425007329, 'conservative_loss': 0.4500395803451538, 'time_step': 0.008537495136260986}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.39it/s, loss=0.528, td_loss=0.0764, conservative_loss=0.452]

[2m2024-08-01 16:23.57[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000841195821762085, 'time_algorithm_update': 0.007614396333694458, 'loss': 0.5274209016412497, 'td_loss': 0.0763373363171704, 'conservative_loss': 0.451083565607667, 'time_step': 0.008638159275054931}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.60it/s, loss=0.527, td_loss=0.0768, conservative_loss=0.45]

[2m2024-08-01 16:24.06[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00082193922996521, 'time_algorithm_update': 0.007470423698425293, 'loss': 0.5274216363877058, 'td_loss': 0.07687654418731109, 'conservative_loss': 0.4505450918674469, 'time_step': 0.008468990325927734}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.52it/s, loss=0.53, td_loss=0.0773, conservative_loss=0.453]


[2m2024-08-01 16:24.15[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008689069747924805, 'time_algorithm_update': 0.007815367698669433, 'loss': 0.5304986265450716, 'td_loss': 0.07741928764479235, 'conservative_loss': 0.4530793388932943, 'time_step': 0.00885663890838623}[0m [36mstep[0m=[35m15000[0m


Epoch 16/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.36it/s, loss=0.521, td_loss=0.0763, conservative_loss=0.445]

[2m2024-08-01 16:24.24[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008478059768676758, 'time_algorithm_update': 0.0076091506481170655, 'loss': 0.5211047282665968, 'td_loss': 0.07631170895509422, 'conservative_loss': 0.444793019130826, 'time_step': 0.008632227182388305}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.41it/s, loss=0.536, td_loss=0.088, conservative_loss=0.448]

[2m2024-08-01 16:24.33[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008565804958343506, 'time_algorithm_update': 0.007666675090789795, 'loss': 0.5351952757239342, 'td_loss': 0.08792983926739543, 'conservative_loss': 0.44726543658971785, 'time_step': 0.008697413682937622}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.69it/s, loss=0.532, td_loss=0.0838, conservative_loss=0.448]

[2m2024-08-01 16:24.41[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008488125801086426, 'time_algorithm_update': 0.007516336441040039, 'loss': 0.5315471820384264, 'td_loss': 0.0836880150823854, 'conservative_loss': 0.4478591671735048, 'time_step': 0.008531103134155274}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.41it/s, loss=0.529, td_loss=0.0822, conservative_loss=0.447]


[2m2024-08-01 16:24.50[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00083681058883667, 'time_algorithm_update': 0.007535736560821533, 'loss': 0.5293782604634761, 'td_loss': 0.08213447401998565, 'conservative_loss': 0.4472437856048346, 'time_step': 0.008549809217453004}[0m [36mstep[0m=[35m19000[0m


Epoch 20/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.61it/s, loss=0.516, td_loss=0.0802, conservative_loss=0.436]

[2m2024-08-01 16:24.59[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008533623218536377, 'time_algorithm_update': 0.007657093524932861, 'loss': 0.5164915316551923, 'td_loss': 0.08054165616771206, 'conservative_loss': 0.43594987542927266, 'time_step': 0.008687647819519043}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.19it/s, loss=0.528, td_loss=0.0815, conservative_loss=0.446]

[2m2024-08-01 16:25.07[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008425378799438477, 'time_algorithm_update': 0.007558364629745484, 'loss': 0.5278724130243063, 'td_loss': 0.08147777124214918, 'conservative_loss': 0.44639464175701143, 'time_step': 0.008574168920516968}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.94it/s, loss=0.524, td_loss=0.0801, conservative_loss=0.444]

[2m2024-08-01 16:25.17[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009400186538696289, 'time_algorithm_update': 0.008769512176513672, 'loss': 0.5233934893459081, 'td_loss': 0.0797929717944935, 'conservative_loss': 0.44360051648318766, 'time_step': 0.009882164478302003}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.91it/s, loss=0.528, td_loss=0.0797, conservative_loss=0.448]

[2m2024-08-01 16:25.26[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007980716228485107, 'time_algorithm_update': 0.0077346408367156986, 'loss': 0.5276907761245966, 'td_loss': 0.07978580603608862, 'conservative_loss': 0.44790496933460233, 'time_step': 0.008685540914535522}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:08<00:00, 123.98it/s, loss=0.516, td_loss=0.0802, conservative_loss=0.436]

[2m2024-08-01 16:25.34[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007851958274841308, 'time_algorithm_update': 0.007018758296966553, 'loss': 0.5163551093041897, 'td_loss': 0.08006291145831347, 'conservative_loss': 0.43629219742119313, 'time_step': 0.00796960186958313}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:08<00:00, 119.71it/s, loss=0.535, td_loss=0.0944, conservative_loss=0.44]

[2m2024-08-01 16:25.43[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008065609931945801, 'time_algorithm_update': 0.007254520893096924, 'loss': 0.5357594655752183, 'td_loss': 0.09485407948400826, 'conservative_loss': 0.4409053859561682, 'time_step': 0.00824982190132141}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.89it/s, loss=0.53, td_loss=0.0911, conservative_loss=0.439]

[2m2024-08-01 16:25.51[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008297355175018311, 'time_algorithm_update': 0.00746721887588501, 'loss': 0.5296406246125698, 'td_loss': 0.09073675579810515, 'conservative_loss': 0.4389038686901331, 'time_step': 0.008455481767654418}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:08<00:00, 118.36it/s, loss=0.538, td_loss=0.0922, conservative_loss=0.445]

[2m2024-08-01 16:26.00[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008245620727539062, 'time_algorithm_update': 0.0073492972850799565, 'loss': 0.5373070156425238, 'td_loss': 0.09218930001417175, 'conservative_loss': 0.44511771634221076, 'time_step': 0.008347576379776001}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.90it/s, loss=0.535, td_loss=0.0914, conservative_loss=0.443]

[2m2024-08-01 16:26.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008442440032958985, 'time_algorithm_update': 0.007743952512741089, 'loss': 0.535133553981781, 'td_loss': 0.09167463955725544, 'conservative_loss': 0.44345891404151916, 'time_step': 0.00875566601753235}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.49it/s, loss=0.52, td_loss=0.0887, conservative_loss=0.431]


[2m2024-08-01 16:26.17[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008339095115661621, 'time_algorithm_update': 0.0076051573753356935, 'loss': 0.5203960787206888, 'td_loss': 0.089134484780021, 'conservative_loss': 0.4312615935206413, 'time_step': 0.008613946199417114}[0m [36mstep[0m=[35m29000[0m


Epoch 30/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.20it/s, loss=0.522, td_loss=0.0903, conservative_loss=0.432]

[2m2024-08-01 16:26.26[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801162203: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008354918956756591, 'time_algorithm_update': 0.007788127183914185, 'loss': 0.5227649859637022, 'td_loss': 0.09048336219601333, 'conservative_loss': 0.43228162413835525, 'time_step': 0.008800947189331054}[0m [36mstep[0m=[35m30000[0m





True

In [6]:
dataset = load_dataset(get_5x5_dataset_path(50))
model = get_BCQ_model()
train_and_save(dataset, model, 'BCQ_Gridworld5x5_50Episode.d3')

dataset = load_dataset(get_5x5_dataset_path(100))
model = get_BCQ_model()
train_and_save(dataset, model, 'BCQ_Gridworld5x5_100Episode.d3')

dataset = load_dataset(get_5x5_dataset_path(200))
model = get_BCQ_model()
train_and_save(dataset, model, 'BCQ_Gridworld5x5_200Episode.d3')

dataset = load_dataset(get_5x5_dataset_path(400))
model = get_BCQ_model()
train_and_save(dataset, model, 'BCQ_Gridworld5x5_400Episode.d3')

50
[2m2024-08-01 16:26.27[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:26.27[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteBCQ_20240801162627[0m
[2m2024-08-01 16:26.27[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:26.35[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:26.35[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_bcq', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'params

Epoch 1/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.80it/s, loss=1.56, td_loss=0.0158, imitator_loss=1.55]


[2m2024-08-01 16:26.46[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007605648040771484, 'time_algorithm_update': 0.01022994613647461, 'loss': 1.5643276439905167, 'td_loss': 0.015692894240168242, 'imitator_loss': 1.5486347502470017, 'time_step': 0.011152668952941895}[0m [36mstep[0m=[35m1000[0m


Epoch 2/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.53it/s, loss=1.5, td_loss=0.000208, imitator_loss=1.5]

[2m2024-08-01 16:26.56[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008279585838317871, 'time_algorithm_update': 0.009025420665740967, 'loss': 1.4978576211929322, 'td_loss': 0.00020777439022458567, 'imitator_loss': 1.497649847149849, 'time_step': 0.010025342226028442}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.95it/s, loss=1.49, td_loss=0.00014, imitator_loss=1.49] 

[2m2024-08-01 16:27.07[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008379461765289307, 'time_algorithm_update': 0.009158750295639038, 'loss': 1.4946530669927598, 'td_loss': 0.00014000915336828256, 'imitator_loss': 1.4945130579471588, 'time_step': 0.010164555549621581}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.50it/s, loss=1.49, td_loss=0.000113, imitator_loss=1.49]

[2m2024-08-01 16:27.16[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008031454086303711, 'time_algorithm_update': 0.008578181266784668, 'loss': 1.4917414392232895, 'td_loss': 0.00011318075268627581, 'imitator_loss': 1.4916282569169999, 'time_step': 0.009546852111816407}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.23it/s, loss=1.49, td_loss=8.93e-5, imitator_loss=1.49]

[2m2024-08-01 16:27.26[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008038487434387207, 'time_algorithm_update': 0.008572405099868775, 'loss': 1.491002499461174, 'td_loss': 8.904398612821752e-05, 'imitator_loss': 1.490913456439972, 'time_step': 0.009561312437057495}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.62it/s, loss=1.49, td_loss=8.38e-5, imitator_loss=1.49]


[2m2024-08-01 16:27.36[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008126885890960693, 'time_algorithm_update': 0.008712884902954101, 'loss': 1.4893867861032486, 'td_loss': 8.370952054610825e-05, 'imitator_loss': 1.489303075313568, 'time_step': 0.009711071968078614}[0m [36mstep[0m=[35m6000[0m


Epoch 7/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.58it/s, loss=1.49, td_loss=7.17e-5, imitator_loss=1.49]

[2m2024-08-01 16:27.46[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000825923204421997, 'time_algorithm_update': 0.008917698383331299, 'loss': 1.4884228872060776, 'td_loss': 7.149591250708909e-05, 'imitator_loss': 1.4883513888120652, 'time_step': 0.009917541742324829}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.73it/s, loss=1.49, td_loss=6.95e-5, imitator_loss=1.49]


[2m2024-08-01 16:27.56[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008315768241882324, 'time_algorithm_update': 0.008793568134307862, 'loss': 1.4897221888303758, 'td_loss': 6.942863097492591e-05, 'imitator_loss': 1.4896527590751647, 'time_step': 0.009804904222488404}[0m [36mstep[0m=[35m8000[0m


Epoch 9/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.38it/s, loss=1.5, td_loss=0.00657, imitator_loss=1.49]


[2m2024-08-01 16:28.06[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008057959079742432, 'time_algorithm_update': 0.008572929859161377, 'loss': 1.4965257736444473, 'td_loss': 0.006516663435111696, 'imitator_loss': 1.4900091100931168, 'time_step': 0.009545905113220215}[0m [36mstep[0m=[35m9000[0m


Epoch 10/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.12it/s, loss=1.49, td_loss=0.00026, imitator_loss=1.49]


[2m2024-08-01 16:28.16[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008166697025299072, 'time_algorithm_update': 0.008865940809249878, 'loss': 1.4901905571222305, 'td_loss': 0.0002600856842946087, 'imitator_loss': 1.4899304723739624, 'time_step': 0.009856194972991943}[0m [36mstep[0m=[35m10000[0m


Epoch 11/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.17it/s, loss=1.49, td_loss=0.000171, imitator_loss=1.49]

[2m2024-08-01 16:28.25[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000791985034942627, 'time_algorithm_update': 0.008624340534210205, 'loss': 1.488644138932228, 'td_loss': 0.0001703159514527215, 'imitator_loss': 1.4884738199710845, 'time_step': 0.009579389810562134}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.75it/s, loss=1.49, td_loss=0.000147, imitator_loss=1.49]

[2m2024-08-01 16:28.35[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008285112380981445, 'time_algorithm_update': 0.008891579389572144, 'loss': 1.4873961956501007, 'td_loss': 0.0001470235501692514, 'imitator_loss': 1.4872491719722747, 'time_step': 0.009896143198013305}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.14it/s, loss=1.49, td_loss=0.000127, imitator_loss=1.49]

[2m2024-08-01 16:28.45[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008162617683410645, 'time_algorithm_update': 0.00879040002822876, 'loss': 1.4894905359745025, 'td_loss': 0.00012716393308983244, 'imitator_loss': 1.4893633728027345, 'time_step': 0.009769215822219849}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.64it/s, loss=1.49, td_loss=0.000116, imitator_loss=1.49]

[2m2024-08-01 16:28.55[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008193342685699462, 'time_algorithm_update': 0.00872951340675354, 'loss': 1.487857889533043, 'td_loss': 0.0001164820076119213, 'imitator_loss': 1.487741405248642, 'time_step': 0.009720471620559692}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.56it/s, loss=1.49, td_loss=0.000106, imitator_loss=1.49]

[2m2024-08-01 16:29.05[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008070759773254394, 'time_algorithm_update': 0.008631074905395508, 'loss': 1.4897699233293533, 'td_loss': 0.00010566695266425086, 'imitator_loss': 1.489664256811142, 'time_step': 0.009624111413955689}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.17it/s, loss=1.49, td_loss=9.95e-5, imitator_loss=1.49] 

[2m2024-08-01 16:29.15[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008356871604919434, 'time_algorithm_update': 0.009026402473449708, 'loss': 1.488454848408699, 'td_loss': 9.937394099142694e-05, 'imitator_loss': 1.4883554754257202, 'time_step': 0.010040579319000244}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.92it/s, loss=1.52, td_loss=0.029, imitator_loss=1.49]


[2m2024-08-01 16:29.25[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008237509727478027, 'time_algorithm_update': 0.00878715467453003, 'loss': 1.5187001087665557, 'td_loss': 0.029081431109458208, 'imitator_loss': 1.4896186788082122, 'time_step': 0.009782017230987549}[0m [36mstep[0m=[35m17000[0m


Epoch 18/30: 100%|██████████| 1000/1000 [00:09<00:00, 107.79it/s, loss=1.5, td_loss=0.0122, imitator_loss=1.49]

[2m2024-08-01 16:29.34[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00079911208152771, 'time_algorithm_update': 0.008160416841506958, 'loss': 1.499464121222496, 'td_loss': 0.012241613435529871, 'imitator_loss': 1.4872225073575973, 'time_step': 0.009151515960693359}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:09<00:00, 106.78it/s, loss=1.5, td_loss=0.00976, imitator_loss=1.49]

[2m2024-08-01 16:29.44[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007872626781463623, 'time_algorithm_update': 0.00830080509185791, 'loss': 1.4985473741292954, 'td_loss': 0.009681980236098753, 'imitator_loss': 1.488865393638611, 'time_step': 0.009253607034683227}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.52it/s, loss=1.49, td_loss=0.00221, imitator_loss=1.49]


[2m2024-08-01 16:29.53[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007861042022705078, 'time_algorithm_update': 0.008670506477355957, 'loss': 1.4905934377908707, 'td_loss': 0.002284221017393065, 'imitator_loss': 1.4883092174530028, 'time_step': 0.00962740659713745}[0m [36mstep[0m=[35m20000[0m


Epoch 21/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.62it/s, loss=1.5, td_loss=0.00732, imitator_loss=1.49] 

[2m2024-08-01 16:30.04[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008311114311218262, 'time_algorithm_update': 0.009006997346878052, 'loss': 1.497376716852188, 'td_loss': 0.007262848668109655, 'imitator_loss': 1.4901138688325881, 'time_step': 0.010018608808517456}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.10it/s, loss=1.49, td_loss=0.00636, imitator_loss=1.49]

[2m2024-08-01 16:30.14[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008390693664550781, 'time_algorithm_update': 0.009144517421722412, 'loss': 1.4939215255975724, 'td_loss': 0.006332530200839756, 'imitator_loss': 1.4875889950990677, 'time_step': 0.010160985708236694}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.20it/s, loss=1.49, td_loss=0.00602, imitator_loss=1.49]

[2m2024-08-01 16:30.24[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008290352821350098, 'time_algorithm_update': 0.008680301666259766, 'loss': 1.4922582899332046, 'td_loss': 0.005975537876922317, 'imitator_loss': 1.4862827526330948, 'time_step': 0.009677645683288574}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.94it/s, loss=1.49, td_loss=0.00392, imitator_loss=1.49] 

[2m2024-08-01 16:30.34[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008255100250244141, 'time_algorithm_update': 0.009006294488906861, 'loss': 1.4913450776338577, 'td_loss': 0.003896329506915208, 'imitator_loss': 1.4874487479925156, 'time_step': 0.009990106344223022}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.36it/s, loss=1.54, td_loss=0.0514, imitator_loss=1.49]

[2m2024-08-01 16:30.44[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008541481494903564, 'time_algorithm_update': 0.00901074481010437, 'loss': 1.5422840769290924, 'td_loss': 0.05182307174781454, 'imitator_loss': 1.4904610041379929, 'time_step': 0.010039377212524413}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.61it/s, loss=1.54, td_loss=0.0541, imitator_loss=1.49]


[2m2024-08-01 16:30.54[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008270766735076904, 'time_algorithm_update': 0.009018783569335937, 'loss': 1.5422691609859467, 'td_loss': 0.0540085573202814, 'imitator_loss': 1.4882606027126313, 'time_step': 0.010013685703277589}[0m [36mstep[0m=[35m26000[0m


Epoch 27/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.34it/s, loss=1.56, td_loss=0.0708, imitator_loss=1.49]

[2m2024-08-01 16:31.04[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008316564559936523, 'time_algorithm_update': 0.009133842468261719, 'loss': 1.559237928390503, 'td_loss': 0.07092303596663987, 'imitator_loss': 1.4883148914575577, 'time_step': 0.010142608404159545}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.42it/s, loss=1.56, td_loss=0.0734, imitator_loss=1.49]

[2m2024-08-01 16:31.15[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008525598049163818, 'time_algorithm_update': 0.009219484329223633, 'loss': 1.5599050650596618, 'td_loss': 0.07316365567501634, 'imitator_loss': 1.4867414098978042, 'time_step': 0.010238992929458618}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.09it/s, loss=1.56, td_loss=0.0754, imitator_loss=1.49]


[2m2024-08-01 16:31.25[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008258893489837646, 'time_algorithm_update': 0.008769550800323486, 'loss': 1.5650297237634658, 'td_loss': 0.07600163119100034, 'imitator_loss': 1.489028092265129, 'time_step': 0.009768571615219115}[0m [36mstep[0m=[35m29000[0m


Epoch 30/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.44it/s, loss=1.56, td_loss=0.0721, imitator_loss=1.49]


[2m2024-08-01 16:31.35[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801162627: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008396563529968261, 'time_algorithm_update': 0.009105504512786865, 'loss': 1.560383647441864, 'td_loss': 0.07198413092829287, 'imitator_loss': 1.488399516940117, 'time_step': 0.010131025552749634}[0m [36mstep[0m=[35m30000[0m
100
[2m2024-08-01 16:31.35[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:31.35[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteBCQ_20240801163135[0m
[2m2024-08-01 16:31.35[0m [[32m[1mdebug    [0m] [1mBu

Epoch 1/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.16it/s, loss=1.55, td_loss=0.01, imitator_loss=1.54]  

[2m2024-08-01 16:31.46[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008559746742248535, 'time_algorithm_update': 0.009014591693878174, 'loss': 1.5510892349481582, 'td_loss': 0.0099456866265391, 'imitator_loss': 1.5411435478925706, 'time_step': 0.010059988021850586}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.67it/s, loss=1.5, td_loss=0.000306, imitator_loss=1.5] 

[2m2024-08-01 16:31.56[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000844386339187622, 'time_algorithm_update': 0.008906677007675171, 'loss': 1.4986647948026657, 'td_loss': 0.0003064732845814433, 'imitator_loss': 1.4983583219051362, 'time_step': 0.0099282968044281}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.12it/s, loss=1.5, td_loss=0.000238, imitator_loss=1.5] 

[2m2024-08-01 16:32.06[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008398966789245606, 'time_algorithm_update': 0.00894817042350769, 'loss': 1.4988243036270141, 'td_loss': 0.00023732947236931068, 'imitator_loss': 1.4985869759321213, 'time_step': 0.00995844554901123}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.54it/s, loss=1.5, td_loss=0.000169, imitator_loss=1.5]

[2m2024-08-01 16:32.16[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008271663188934327, 'time_algorithm_update': 0.008806742429733276, 'loss': 1.4952034561634064, 'td_loss': 0.00016852883924002528, 'imitator_loss': 1.4950349266529084, 'time_step': 0.00981663990020752}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:10<00:00, 95.94it/s, loss=1.5, td_loss=0.000155, imitator_loss=1.5]

[2m2024-08-01 16:32.26[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008707656860351562, 'time_algorithm_update': 0.009232174158096313, 'loss': 1.498792492032051, 'td_loss': 0.00015513036112497502, 'imitator_loss': 1.4986373598575593, 'time_step': 0.010290061473846436}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.32it/s, loss=1.5, td_loss=0.000113, imitator_loss=1.5]  


[2m2024-08-01 16:32.37[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008481497764587403, 'time_algorithm_update': 0.009016885757446289, 'loss': 1.4958499975204467, 'td_loss': 0.00011239309161464917, 'imitator_loss': 1.4957376059293748, 'time_step': 0.010043734312057495}[0m [36mstep[0m=[35m6000[0m


Epoch 7/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.33it/s, loss=1.5, td_loss=0.00011, imitator_loss=1.5] 

[2m2024-08-01 16:32.47[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008433709144592285, 'time_algorithm_update': 0.008918842315673829, 'loss': 1.4969542472362518, 'td_loss': 0.00011005851076879481, 'imitator_loss': 1.496844187617302, 'time_step': 0.009935308694839478}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.55it/s, loss=1.5, td_loss=0.000167, imitator_loss=1.5]  

[2m2024-08-01 16:32.57[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008661892414093018, 'time_algorithm_update': 0.00897369647026062, 'loss': 1.495949665427208, 'td_loss': 0.00016640808088959602, 'imitator_loss': 1.4957832572460175, 'time_step': 0.01002263355255127}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:10<00:00, 95.75it/s, loss=1.5, td_loss=0.005, imitator_loss=1.5]   

[2m2024-08-01 16:33.07[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008794593811035156, 'time_algorithm_update': 0.009272913932800293, 'loss': 1.5003261448144913, 'td_loss': 0.004963777455199306, 'imitator_loss': 1.4953623687028885, 'time_step': 0.010312708377838134}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.96it/s, loss=1.5, td_loss=0.000231, imitator_loss=1.5]


[2m2024-08-01 16:33.17[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008541903495788574, 'time_algorithm_update': 0.00904405665397644, 'loss': 1.4965744825601577, 'td_loss': 0.00023016615052256383, 'imitator_loss': 1.4963443180322646, 'time_step': 0.010081412315368653}[0m [36mstep[0m=[35m10000[0m


Epoch 11/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.14it/s, loss=1.5, td_loss=0.000168, imitator_loss=1.5] 

[2m2024-08-01 16:33.28[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008464956283569336, 'time_algorithm_update': 0.008938719749450683, 'loss': 1.495732880115509, 'td_loss': 0.00016730040932998235, 'imitator_loss': 1.4955655794143676, 'time_step': 0.009952442646026611}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.70it/s, loss=1.5, td_loss=0.000147, imitator_loss=1.5]  

[2m2024-08-01 16:33.38[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008416392803192138, 'time_algorithm_update': 0.008895828247070312, 'loss': 1.497073802471161, 'td_loss': 0.00014675670889300818, 'imitator_loss': 1.4969270466566085, 'time_step': 0.009903306722640991}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.35it/s, loss=1.5, td_loss=0.000132, imitator_loss=1.5] 

[2m2024-08-01 16:33.48[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008466124534606933, 'time_algorithm_update': 0.008921640157699586, 'loss': 1.4961323845386505, 'td_loss': 0.0001322998206678676, 'imitator_loss': 1.4960000849962234, 'time_step': 0.009937697887420655}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.96it/s, loss=1.5, td_loss=0.00014, imitator_loss=1.5] 

[2m2024-08-01 16:33.58[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008514931201934814, 'time_algorithm_update': 0.008937718391418457, 'loss': 1.4965985059738158, 'td_loss': 0.0001397688410925184, 'imitator_loss': 1.4964587385654449, 'time_step': 0.00997132444381714}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.71it/s, loss=1.5, td_loss=0.000132, imitator_loss=1.5] 

[2m2024-08-01 16:34.08[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008735215663909912, 'time_algorithm_update': 0.009146843910217285, 'loss': 1.4975479331016541, 'td_loss': 0.00013231224376613682, 'imitator_loss': 1.4974156194925308, 'time_step': 0.010208808183670044}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.13it/s, loss=1.5, td_loss=0.000117, imitator_loss=1.5] 

[2m2024-08-01 16:34.18[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008384442329406738, 'time_algorithm_update': 0.008840518474578857, 'loss': 1.495370098233223, 'td_loss': 0.00011731839070671412, 'imitator_loss': 1.4952527800798416, 'time_step': 0.009867172718048095}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.75it/s, loss=1.51, td_loss=0.0115, imitator_loss=1.5]

[2m2024-08-01 16:34.28[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008697142601013184, 'time_algorithm_update': 0.009056377172470093, 'loss': 1.5078797857761383, 'td_loss': 0.011476307366261608, 'imitator_loss': 1.49640347969532, 'time_step': 0.010102533102035523}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.82it/s, loss=1.51, td_loss=0.0129, imitator_loss=1.5] 


[2m2024-08-01 16:34.39[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008521718978881836, 'time_algorithm_update': 0.008987327575683594, 'loss': 1.5087006468772888, 'td_loss': 0.012756128019027528, 'imitator_loss': 1.495944518327713, 'time_step': 0.010002463340759278}[0m [36mstep[0m=[35m18000[0m


Epoch 19/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.91it/s, loss=1.51, td_loss=0.0152, imitator_loss=1.5]

[2m2024-08-01 16:34.49[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008391942977905273, 'time_algorithm_update': 0.008879130840301513, 'loss': 1.511158264875412, 'td_loss': 0.01506462080982601, 'imitator_loss': 1.496093642950058, 'time_step': 0.00989460039138794}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.37it/s, loss=1.51, td_loss=0.0122, imitator_loss=1.5] 


[2m2024-08-01 16:34.59[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008445084095001221, 'time_algorithm_update': 0.00898389458656311, 'loss': 1.507450639128685, 'td_loss': 0.012153151793565485, 'imitator_loss': 1.495297487974167, 'time_step': 0.01003231716156006}[0m [36mstep[0m=[35m20000[0m


Epoch 21/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.49it/s, loss=1.5, td_loss=0.00946, imitator_loss=1.5] 

[2m2024-08-01 16:35.09[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008446073532104492, 'time_algorithm_update': 0.008911803960800171, 'loss': 1.5045221967697144, 'td_loss': 0.009398188159626442, 'imitator_loss': 1.4951240077018737, 'time_step': 0.009915708303451539}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.27it/s, loss=1.51, td_loss=0.00917, imitator_loss=1.5]

[2m2024-08-01 16:35.19[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008605632781982422, 'time_algorithm_update': 0.009218195676803589, 'loss': 1.5056445192098618, 'td_loss': 0.009112776236834179, 'imitator_loss': 1.4965317414999009, 'time_step': 0.01025670313835144}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.45it/s, loss=1.5, td_loss=0.005, imitator_loss=1.5]   

[2m2024-08-01 16:35.29[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008444175720214844, 'time_algorithm_update': 0.00880420207977295, 'loss': 1.5003093543052672, 'td_loss': 0.004974327549582085, 'imitator_loss': 1.4953350259065628, 'time_step': 0.009828293561935425}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.54it/s, loss=1.5, td_loss=0.00589, imitator_loss=1.49]

[2m2024-08-01 16:35.39[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008816053867340088, 'time_algorithm_update': 0.009053196430206299, 'loss': 1.4999131189584731, 'td_loss': 0.005855372213482042, 'imitator_loss': 1.4940577446222305, 'time_step': 0.010122660636901856}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.78it/s, loss=1.51, td_loss=0.0121, imitator_loss=1.5]

[2m2024-08-01 16:35.50[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008420491218566894, 'time_algorithm_update': 0.008860039472579956, 'loss': 1.509305931687355, 'td_loss': 0.012124295654008166, 'imitator_loss': 1.4971816369295121, 'time_step': 0.009890535831451415}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.99it/s, loss=1.51, td_loss=0.0123, imitator_loss=1.5] 

[2m2024-08-01 16:36.00[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008500902652740478, 'time_algorithm_update': 0.008962918996810914, 'loss': 1.5078421039581298, 'td_loss': 0.012273302589805099, 'imitator_loss': 1.495568801522255, 'time_step': 0.00999048686027527}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.12it/s, loss=1.5, td_loss=0.0093, imitator_loss=1.5] 

[2m2024-08-01 16:36.10[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008428726196289063, 'time_algorithm_update': 0.008834853410720826, 'loss': 1.504538679599762, 'td_loss': 0.009252775675820885, 'imitator_loss': 1.4952859063148498, 'time_step': 0.009847035884857178}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.97it/s, loss=1.5, td_loss=0.00665, imitator_loss=1.5] 


[2m2024-08-01 16:36.20[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008564016819000244, 'time_algorithm_update': 0.009054543495178223, 'loss': 1.502658071756363, 'td_loss': 0.0066594281207508176, 'imitator_loss': 1.4959986435174941, 'time_step': 0.01008931040763855}[0m [36mstep[0m=[35m28000[0m


Epoch 29/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.69it/s, loss=1.5, td_loss=0.00716, imitator_loss=1.5] 

[2m2024-08-01 16:36.30[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008523895740509033, 'time_algorithm_update': 0.008988976240158082, 'loss': 1.5022184796333313, 'td_loss': 0.0071839550944132495, 'imitator_loss': 1.495034523844719, 'time_step': 0.010015220165252685}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.46it/s, loss=1.5, td_loss=0.00805, imitator_loss=1.5]

[2m2024-08-01 16:36.40[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163135: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008289251327514649, 'time_algorithm_update': 0.008732074975967407, 'loss': 1.5038727941513061, 'td_loss': 0.00801441871501811, 'imitator_loss': 1.4958583753108978, 'time_step': 0.009735137224197388}[0m [36mstep[0m=[35m30000[0m





200
[2m2024-08-01 16:36.40[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:36.40[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteBCQ_20240801163640[0m
[2m2024-08-01 16:36.40[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:36.41[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:36.41[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_bcq', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'param

Epoch 1/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.31it/s, loss=1.58, td_loss=0.0158, imitator_loss=1.56]

[2m2024-08-01 16:36.51[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000845623254776001, 'time_algorithm_update': 0.008892897844314575, 'loss': 1.574894294500351, 'td_loss': 0.01571109743944544, 'imitator_loss': 1.5591831988096236, 'time_step': 0.009933113336563111}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.28it/s, loss=1.51, td_loss=0.000338, imitator_loss=1.5] 

[2m2024-08-01 16:37.01[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008430073261260987, 'time_algorithm_update': 0.008924240112304688, 'loss': 1.5050987561941147, 'td_loss': 0.00033616887696007327, 'imitator_loss': 1.5047625885009766, 'time_step': 0.009950406312942505}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.18it/s, loss=1.5, td_loss=0.000156, imitator_loss=1.5]


[2m2024-08-01 16:37.11[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008628170490264892, 'time_algorithm_update': 0.009011534452438354, 'loss': 1.5016456446647644, 'td_loss': 0.00015519459852475847, 'imitator_loss': 1.5014904516935348, 'time_step': 0.01004180121421814}[0m [36mstep[0m=[35m3000[0m


Epoch 4/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.83it/s, loss=1.5, td_loss=9.83e-5, imitator_loss=1.5] 

[2m2024-08-01 16:37.21[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008517136573791504, 'time_algorithm_update': 0.00882715106010437, 'loss': 1.5004035201072692, 'td_loss': 9.79263237431951e-05, 'imitator_loss': 1.5003055924177169, 'time_step': 0.009872549533843995}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:10<00:00, 91.67it/s, loss=1.5, td_loss=7.02e-5, imitator_loss=1.5] 

[2m2024-08-01 16:37.32[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008919460773468017, 'time_algorithm_update': 0.00961765193939209, 'loss': 1.4987376710176468, 'td_loss': 7.021715798327933e-05, 'imitator_loss': 1.4986674540042877, 'time_step': 0.010726210355758667}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.18it/s, loss=1.5, td_loss=7.2e-5, imitator_loss=1.5] 

[2m2024-08-01 16:37.43[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008887512683868409, 'time_algorithm_update': 0.009515345096588135, 'loss': 1.498036575436592, 'td_loss': 7.2206243918572e-05, 'imitator_loss': 1.497964367747307, 'time_step': 0.010580309629440308}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.82it/s, loss=1.5, td_loss=7.7e-5, imitator_loss=1.5] 

[2m2024-08-01 16:37.53[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008821537494659424, 'time_algorithm_update': 0.009046693801879883, 'loss': 1.49823845911026, 'td_loss': 7.694307561177994e-05, 'imitator_loss': 1.4981615171432494, 'time_step': 0.010107144832611084}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:10<00:00, 95.65it/s, loss=1.5, td_loss=8.77e-5, imitator_loss=1.5] 


[2m2024-08-01 16:38.03[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008782663345336914, 'time_algorithm_update': 0.009198390007019042, 'loss': 1.4977468521595, 'td_loss': 8.736876583498087e-05, 'imitator_loss': 1.4976594820022584, 'time_step': 0.010289260864257813}[0m [36mstep[0m=[35m8000[0m


Epoch 9/30: 100%|██████████| 1000/1000 [00:10<00:00, 95.15it/s, loss=1.51, td_loss=0.00796, imitator_loss=1.5]


[2m2024-08-01 16:38.14[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008862829208374024, 'time_algorithm_update': 0.009285593509674072, 'loss': 1.5055840542316437, 'td_loss': 0.007891832884517499, 'imitator_loss': 1.4976922198534013, 'time_step': 0.010363944292068482}[0m [36mstep[0m=[35m9000[0m


Epoch 10/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.24it/s, loss=1.5, td_loss=0.000382, imitator_loss=1.5]

[2m2024-08-01 16:38.25[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008847284317016602, 'time_algorithm_update': 0.00944911527633667, 'loss': 1.500057629942894, 'td_loss': 0.0003805865365793579, 'imitator_loss': 1.4996770423650743, 'time_step': 0.010547663927078246}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.93it/s, loss=1.5, td_loss=0.000234, imitator_loss=1.5] 


[2m2024-08-01 16:38.35[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008607144355773926, 'time_algorithm_update': 0.008954490661621094, 'loss': 1.4976536017656326, 'td_loss': 0.00023407145071178091, 'imitator_loss': 1.4974195306301117, 'time_step': 0.009984809160232543}[0m [36mstep[0m=[35m11000[0m


Epoch 12/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.21it/s, loss=1.5, td_loss=0.000168, imitator_loss=1.5]

[2m2024-08-01 16:38.45[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008698475360870362, 'time_algorithm_update': 0.00909567904472351, 'loss': 1.4999192811250686, 'td_loss': 0.0001680057186040358, 'imitator_loss': 1.4997512738704681, 'time_step': 0.01016375756263733}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.50it/s, loss=1.5, td_loss=0.000143, imitator_loss=1.5]

[2m2024-08-01 16:38.55[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008495287895202636, 'time_algorithm_update': 0.008878063917160034, 'loss': 1.4984951496124268, 'td_loss': 0.00014312025799063122, 'imitator_loss': 1.4983520276546478, 'time_step': 0.009916607856750487}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.05it/s, loss=1.5, td_loss=0.000128, imitator_loss=1.5]


[2m2024-08-01 16:39.05[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008587620258331299, 'time_algorithm_update': 0.008926231384277344, 'loss': 1.4985271310806274, 'td_loss': 0.00012820403284240456, 'imitator_loss': 1.4983989255428314, 'time_step': 0.009962586164474487}[0m [36mstep[0m=[35m14000[0m


Epoch 15/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.49it/s, loss=1.5, td_loss=0.000115, imitator_loss=1.5] 


[2m2024-08-01 16:39.15[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008584177494049073, 'time_algorithm_update': 0.008944749116897584, 'loss': 1.500049279332161, 'td_loss': 0.00011470316258601087, 'imitator_loss': 1.4999345752000808, 'time_step': 0.009996648311614991}[0m [36mstep[0m=[35m15000[0m


Epoch 16/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.19it/s, loss=1.5, td_loss=0.00011, imitator_loss=1.5] 

[2m2024-08-01 16:39.25[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008467085361480713, 'time_algorithm_update': 0.008716484308242798, 'loss': 1.5003132323026658, 'td_loss': 0.00011034194389139884, 'imitator_loss': 1.5002028889656067, 'time_step': 0.009756389617919923}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.67it/s, loss=1.51, td_loss=0.00983, imitator_loss=1.5]


[2m2024-08-01 16:39.35[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000853712797164917, 'time_algorithm_update': 0.008957836151123047, 'loss': 1.508903741002083, 'td_loss': 0.009791818151617917, 'imitator_loss': 1.4991119229793548, 'time_step': 0.009987444639205933}[0m [36mstep[0m=[35m17000[0m


Epoch 18/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.51it/s, loss=1.51, td_loss=0.0146, imitator_loss=1.5] 

[2m2024-08-01 16:39.46[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008546879291534424, 'time_algorithm_update': 0.009016682147979736, 'loss': 1.5119660820960998, 'td_loss': 0.014484769884351408, 'imitator_loss': 1.4974813129901885, 'time_step': 0.01003283166885376}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:09<00:00, 104.03it/s, loss=1.51, td_loss=0.0173, imitator_loss=1.5]

[2m2024-08-01 16:39.55[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008248274326324463, 'time_algorithm_update': 0.008492629051208496, 'loss': 1.5145140463113784, 'td_loss': 0.01733188042137772, 'imitator_loss': 1.4971821670532226, 'time_step': 0.009487157821655273}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.70it/s, loss=1.52, td_loss=0.0206, imitator_loss=1.5]


[2m2024-08-01 16:40.06[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008770575523376465, 'time_algorithm_update': 0.00913898229598999, 'loss': 1.5175418995618821, 'td_loss': 0.020686145580199083, 'imitator_loss': 1.4968557549715042, 'time_step': 0.01021372127532959}[0m [36mstep[0m=[35m20000[0m


Epoch 21/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.04it/s, loss=1.52, td_loss=0.0205, imitator_loss=1.5] 

[2m2024-08-01 16:40.16[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008690855503082275, 'time_algorithm_update': 0.009127042293548583, 'loss': 1.5176117864847183, 'td_loss': 0.02071912652492756, 'imitator_loss': 1.4968926599025727, 'time_step': 0.010176422119140625}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.82it/s, loss=1.52, td_loss=0.022, imitator_loss=1.5] 

[2m2024-08-01 16:40.26[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008459539413452149, 'time_algorithm_update': 0.008974098682403564, 'loss': 1.5207984812259674, 'td_loss': 0.022000920916791074, 'imitator_loss': 1.498797560095787, 'time_step': 0.00999716806411743}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.16it/s, loss=1.52, td_loss=0.0206, imitator_loss=1.5]


[2m2024-08-01 16:40.36[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000873704195022583, 'time_algorithm_update': 0.009108993291854859, 'loss': 1.5198486691713333, 'td_loss': 0.020764742396655492, 'imitator_loss': 1.4990839275121688, 'time_step': 0.010162557840347291}[0m [36mstep[0m=[35m23000[0m


Epoch 24/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.60it/s, loss=1.52, td_loss=0.0223, imitator_loss=1.5]

[2m2024-08-01 16:40.46[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008560621738433838, 'time_algorithm_update': 0.00887489342689514, 'loss': 1.5194850419759751, 'td_loss': 0.022182540669920855, 'imitator_loss': 1.4973024998903275, 'time_step': 0.009906848907470704}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.78it/s, loss=1.52, td_loss=0.0168, imitator_loss=1.5]


[2m2024-08-01 16:40.57[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008478882312774658, 'time_algorithm_update': 0.008939788103103637, 'loss': 1.5161890211105347, 'td_loss': 0.01672342734967242, 'imitator_loss': 1.499465593934059, 'time_step': 0.009982693672180175}[0m [36mstep[0m=[35m25000[0m


Epoch 26/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.13it/s, loss=1.51, td_loss=0.0133, imitator_loss=1.5]


[2m2024-08-01 16:41.07[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008365795612335205, 'time_algorithm_update': 0.008842553615570068, 'loss': 1.5118623738288879, 'td_loss': 0.013307520607166225, 'imitator_loss': 1.4985548557043076, 'time_step': 0.009863455295562745}[0m [36mstep[0m=[35m26000[0m


Epoch 27/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.56it/s, loss=1.51, td_loss=0.0151, imitator_loss=1.5]


[2m2024-08-01 16:41.17[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000847301721572876, 'time_algorithm_update': 0.008984037160873414, 'loss': 1.5135069279670714, 'td_loss': 0.015169088711118092, 'imitator_loss': 1.4983378406763077, 'time_step': 0.010013372898101806}[0m [36mstep[0m=[35m27000[0m


Epoch 28/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.76it/s, loss=1.52, td_loss=0.0201, imitator_loss=1.5]

[2m2024-08-01 16:41.27[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008391597270965576, 'time_algorithm_update': 0.008887051343917847, 'loss': 1.5184861146211623, 'td_loss': 0.02010701435568626, 'imitator_loss': 1.4983791023492814, 'time_step': 0.009899519443511962}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.13it/s, loss=1.51, td_loss=0.0163, imitator_loss=1.5]


[2m2024-08-01 16:41.37[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008505623340606689, 'time_algorithm_update': 0.008841118335723878, 'loss': 1.5126737637519836, 'td_loss': 0.01631166021624813, 'imitator_loss': 1.4963621032238006, 'time_step': 0.00986222791671753}[0m [36mstep[0m=[35m29000[0m


Epoch 30/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.64it/s, loss=1.52, td_loss=0.0206, imitator_loss=1.5]

[2m2024-08-01 16:41.47[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801163640: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000857903242111206, 'time_algorithm_update': 0.008976672410964965, 'loss': 1.5184339821338653, 'td_loss': 0.02053659748105565, 'imitator_loss': 1.497897381901741, 'time_step': 0.010011765718460084}[0m [36mstep[0m=[35m30000[0m





400
[2m2024-08-01 16:41.47[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:41.47[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteBCQ_20240801164147[0m
[2m2024-08-01 16:41.47[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:41.47[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:41.47[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_bcq', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'param

Epoch 1/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.44it/s, loss=1.57, td_loss=0.0136, imitator_loss=1.55]


[2m2024-08-01 16:41.57[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008357124328613281, 'time_algorithm_update': 0.00871450161933899, 'loss': 1.564384092450142, 'td_loss': 0.01348204654494657, 'imitator_loss': 1.5509020458459855, 'time_step': 0.00973845672607422}[0m [36mstep[0m=[35m1000[0m


Epoch 2/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.53it/s, loss=1.5, td_loss=0.00012, imitator_loss=1.5]   


[2m2024-08-01 16:42.07[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000846836805343628, 'time_algorithm_update': 0.00890225076675415, 'loss': 1.5034635004997254, 'td_loss': 0.00011992527719303326, 'imitator_loss': 1.5033435753583908, 'time_step': 0.009920946598052979}[0m [36mstep[0m=[35m2000[0m


Epoch 3/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.80it/s, loss=1.5, td_loss=0.000103, imitator_loss=1.5]

[2m2024-08-01 16:42.17[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008416275978088379, 'time_algorithm_update': 0.008755293846130371, 'loss': 1.5024093527793885, 'td_loss': 0.00010420650097603356, 'imitator_loss': 1.5023051450252534, 'time_step': 0.009774176836013794}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.87it/s, loss=1.5, td_loss=0.000109, imitator_loss=1.5]


[2m2024-08-01 16:42.28[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008825976848602294, 'time_algorithm_update': 0.009451447248458863, 'loss': 1.5003536401987076, 'td_loss': 0.0001088976600967726, 'imitator_loss': 1.5002447432279586, 'time_step': 0.010513124227523804}[0m [36mstep[0m=[35m4000[0m


Epoch 5/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.06it/s, loss=1.5, td_loss=9.62e-5, imitator_loss=1.5]

[2m2024-08-01 16:42.38[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008280618190765381, 'time_algorithm_update': 0.008578683137893678, 'loss': 1.4988981845378875, 'td_loss': 9.639885387514368e-05, 'imitator_loss': 1.4988017859458924, 'time_step': 0.009576202869415283}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.27it/s, loss=1.5, td_loss=9.86e-5, imitator_loss=1.5] 

[2m2024-08-01 16:42.48[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008781094551086426, 'time_algorithm_update': 0.009209859371185302, 'loss': 1.497326080441475, 'td_loss': 9.829197969338566e-05, 'imitator_loss': 1.4972277907133102, 'time_step': 0.010256990909576416}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:09<00:00, 104.90it/s, loss=1.5, td_loss=9.37e-5, imitator_loss=1.5] 


[2m2024-08-01 16:42.58[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008327937126159667, 'time_algorithm_update': 0.008404332876205444, 'loss': 1.5012284957170487, 'td_loss': 9.373173897893139e-05, 'imitator_loss': 1.501134765267372, 'time_step': 0.00941447639465332}[0m [36mstep[0m=[35m7000[0m


Epoch 8/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.23it/s, loss=1.5, td_loss=8.84e-5, imitator_loss=1.5]

[2m2024-08-01 16:43.07[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008442487716674805, 'time_algorithm_update': 0.008634149074554443, 'loss': 1.4989248238801955, 'td_loss': 8.820371190540754e-05, 'imitator_loss': 1.4988366191387177, 'time_step': 0.009658643007278443}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.71it/s, loss=1.51, td_loss=0.00656, imitator_loss=1.5]


[2m2024-08-01 16:43.18[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008529601097106934, 'time_algorithm_update': 0.008944426298141479, 'loss': 1.506950912117958, 'td_loss': 0.006508655574412841, 'imitator_loss': 1.500442255616188, 'time_step': 0.009986181735992432}[0m [36mstep[0m=[35m9000[0m


Epoch 10/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.87it/s, loss=1.5, td_loss=0.000422, imitator_loss=1.5]


[2m2024-08-01 16:43.27[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008193140029907227, 'time_algorithm_update': 0.008710510015487671, 'loss': 1.4992774879932405, 'td_loss': 0.00042012362047535133, 'imitator_loss': 1.4988573646545411, 'time_step': 0.009699864864349365}[0m [36mstep[0m=[35m10000[0m


Epoch 11/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.94it/s, loss=1.5, td_loss=0.000241, imitator_loss=1.5] 

[2m2024-08-01 16:43.37[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008394510746002197, 'time_algorithm_update': 0.008589125156402588, 'loss': 1.4992814342975616, 'td_loss': 0.00024099568578458275, 'imitator_loss': 1.4990404393672943, 'time_step': 0.009592156648635865}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.94it/s, loss=1.5, td_loss=0.000174, imitator_loss=1.5] 


[2m2024-08-01 16:43.47[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008340787887573242, 'time_algorithm_update': 0.008696790933609009, 'loss': 1.4980210970640182, 'td_loss': 0.00017443211897989386, 'imitator_loss': 1.4978466655015945, 'time_step': 0.009692871570587158}[0m [36mstep[0m=[35m12000[0m


Epoch 13/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.48it/s, loss=1.5, td_loss=0.000155, imitator_loss=1.5]

[2m2024-08-01 16:43.57[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008591396808624267, 'time_algorithm_update': 0.008695534944534301, 'loss': 1.4989387409687043, 'td_loss': 0.00015514653348145657, 'imitator_loss': 1.4987835923433304, 'time_step': 0.009728493690490723}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.59it/s, loss=1.5, td_loss=0.000139, imitator_loss=1.5] 

[2m2024-08-01 16:44.07[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008567566871643067, 'time_algorithm_update': 0.008891883134841919, 'loss': 1.4996417319774629, 'td_loss': 0.0001392441603111365, 'imitator_loss': 1.4995024874210359, 'time_step': 0.009909608840942382}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.20it/s, loss=1.5, td_loss=0.000123, imitator_loss=1.5] 

[2m2024-08-01 16:44.17[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008408510684967041, 'time_algorithm_update': 0.008747430801391601, 'loss': 1.4986405811309815, 'td_loss': 0.00012345612019089457, 'imitator_loss': 1.498517124414444, 'time_step': 0.009762514352798462}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.58it/s, loss=1.5, td_loss=0.000121, imitator_loss=1.5]

[2m2024-08-01 16:44.27[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008420791625976563, 'time_algorithm_update': 0.0087735915184021, 'loss': 1.4987947062253952, 'td_loss': 0.00012065370553682442, 'imitator_loss': 1.4986740529537201, 'time_step': 0.009799086570739746}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.48it/s, loss=1.51, td_loss=0.0155, imitator_loss=1.5]

[2m2024-08-01 16:44.36[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008401241302490234, 'time_algorithm_update': 0.008615963220596313, 'loss': 1.5142347832918168, 'td_loss': 0.015369823414774147, 'imitator_loss': 1.4988649601936341, 'time_step': 0.009628260374069214}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.29it/s, loss=1.52, td_loss=0.0165, imitator_loss=1.5]

[2m2024-08-01 16:44.47[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008679416179656983, 'time_algorithm_update': 0.009008301258087157, 'loss': 1.516144588470459, 'td_loss': 0.01682768624325399, 'imitator_loss': 1.4993169022798538, 'time_step': 0.01004549503326416}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.44it/s, loss=1.52, td_loss=0.0179, imitator_loss=1.5]

[2m2024-08-01 16:44.56[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008234221935272217, 'time_algorithm_update': 0.008566156625747681, 'loss': 1.5180427337884903, 'td_loss': 0.018050546834856504, 'imitator_loss': 1.4999921882152558, 'time_step': 0.00955120849609375}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.51it/s, loss=1.52, td_loss=0.0164, imitator_loss=1.5]

[2m2024-08-01 16:45.06[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008588836193084716, 'time_algorithm_update': 0.00898847484588623, 'loss': 1.515879926085472, 'td_loss': 0.01634143043908989, 'imitator_loss': 1.4995384939908982, 'time_step': 0.010024882316589356}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.13it/s, loss=1.51, td_loss=0.0161, imitator_loss=1.5]


[2m2024-08-01 16:45.16[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008484897613525391, 'time_algorithm_update': 0.008849636793136597, 'loss': 1.5144372287988663, 'td_loss': 0.016054844452795806, 'imitator_loss': 1.4983823838233947, 'time_step': 0.009862529516220093}[0m [36mstep[0m=[35m21000[0m


Epoch 22/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.18it/s, loss=1.52, td_loss=0.0171, imitator_loss=1.5]

[2m2024-08-01 16:45.26[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008441672325134277, 'time_algorithm_update': 0.008564783096313476, 'loss': 1.5152990565299989, 'td_loss': 0.0169948369711783, 'imitator_loss': 1.4983042196035385, 'time_step': 0.009574463605880738}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.88it/s, loss=1.51, td_loss=0.0126, imitator_loss=1.5]


[2m2024-08-01 16:45.37[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008760638236999512, 'time_algorithm_update': 0.009113215684890747, 'loss': 1.5098719497919082, 'td_loss': 0.012494920614364673, 'imitator_loss': 1.4973770289421082, 'time_step': 0.010178860902786255}[0m [36mstep[0m=[35m23000[0m


Epoch 24/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.26it/s, loss=1.51, td_loss=0.0141, imitator_loss=1.5]


[2m2024-08-01 16:45.47[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008586912155151367, 'time_algorithm_update': 0.009107810258865357, 'loss': 1.5109657912254333, 'td_loss': 0.014042800629977138, 'imitator_loss': 1.4969229902029038, 'time_step': 0.010151489734649658}[0m [36mstep[0m=[35m24000[0m


Epoch 25/30: 100%|██████████| 1000/1000 [00:10<00:00, 95.47it/s, loss=1.52, td_loss=0.0174, imitator_loss=1.5]


[2m2024-08-01 16:45.57[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008927052021026611, 'time_algorithm_update': 0.009204805850982666, 'loss': 1.516336207985878, 'td_loss': 0.017508484387770296, 'imitator_loss': 1.498827725291252, 'time_step': 0.01031724739074707}[0m [36mstep[0m=[35m25000[0m


Epoch 26/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.20it/s, loss=1.52, td_loss=0.0178, imitator_loss=1.5]


[2m2024-08-01 16:46.07[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008541522026062012, 'time_algorithm_update': 0.008931430578231812, 'loss': 1.51658032643795, 'td_loss': 0.01781419379793806, 'imitator_loss': 1.498766132235527, 'time_step': 0.009960577726364135}[0m [36mstep[0m=[35m26000[0m


Epoch 27/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.28it/s, loss=1.52, td_loss=0.0205, imitator_loss=1.5]

[2m2024-08-01 16:46.18[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008627805709838867, 'time_algorithm_update': 0.008908955812454223, 'loss': 1.5195484791994094, 'td_loss': 0.02046761322754901, 'imitator_loss': 1.4990808662176132, 'time_step': 0.009948010683059692}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.78it/s, loss=1.52, td_loss=0.02, imitator_loss=1.5]  


[2m2024-08-01 16:46.28[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008461489677429199, 'time_algorithm_update': 0.00884712815284729, 'loss': 1.5174710183143616, 'td_loss': 0.019863162491528783, 'imitator_loss': 1.4976078544855118, 'time_step': 0.009869166851043701}[0m [36mstep[0m=[35m28000[0m


Epoch 29/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.26it/s, loss=1.52, td_loss=0.0166, imitator_loss=1.5] 

[2m2024-08-01 16:46.38[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000829699993133545, 'time_algorithm_update': 0.009049487113952637, 'loss': 1.5163118407726288, 'td_loss': 0.0167045412712032, 'imitator_loss': 1.4996072989702225, 'time_step': 0.010049678564071655}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:12<00:00, 82.91it/s, loss=1.51, td_loss=0.0157, imitator_loss=1.5]

[2m2024-08-01 16:46.50[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801164147: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007825355529785156, 'time_algorithm_update': 0.01096510910987854, 'loss': 1.5145137014389038, 'td_loss': 0.015738273877825122, 'imitator_loss': 1.4987754269838334, 'time_step': 0.011918059825897217}[0m [36mstep[0m=[35m30000[0m





True

### 6x6 World

In [7]:
base_path = '/vol/bitbucket/phl23'
dataset_prefix = 'Gridworld5x5RandomPPO'
dataset_postfix = 'Episode_dataset.pkl'

def get_6x6_dataset_path(episodes):
    base_path = '/vol/bitbucket/phl23'
    dataset_prefix = 'Gridworld6x6RandomPPO'
    dataset_postfix = 'Episode_dataset.pkl'
    return f'{base_path}/{dataset_prefix}_{episodes}{dataset_postfix}'

In [8]:
dataset = load_dataset(get_6x6_dataset_path(50))
model = get_CQL_model()
train_and_save(dataset, model, 'CQL_Gridworld6x6_50Episode.d3')

dataset = load_dataset(get_6x6_dataset_path(100))
model = get_CQL_model()
train_and_save(dataset, model, 'CQL_Gridworld6x6_100Episode.d3')

dataset = load_dataset(get_6x6_dataset_path(200))
model = get_CQL_model()
train_and_save(dataset, model, 'CQL_Gridworld6x6_200Episode.d3')

dataset = load_dataset(get_6x6_dataset_path(400))
model = get_CQL_model()
train_and_save(dataset, model, 'CQL_Gridworld6x6_400Episode.d3')

50
[2m2024-08-01 16:46.50[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:46.50[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240801164650[0m
[2m2024-08-01 16:46.50[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:46.50[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:46.50[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'params

Epoch 1/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.13it/s, loss=0.839, td_loss=0.0522, conservative_loss=0.786]

[2m2024-08-01 16:47.01[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007700245380401612, 'time_algorithm_update': 0.009337636232376099, 'loss': 0.8373788753151894, 'td_loss': 0.05225765111995861, 'conservative_loss': 0.7851212240457535, 'time_step': 0.010280593633651733}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:11<00:00, 83.77it/s, loss=0.692, td_loss=0.0599, conservative_loss=0.632]

[2m2024-08-01 16:47.13[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000867828369140625, 'time_algorithm_update': 0.01071670651435852, 'loss': 0.6910480763614177, 'td_loss': 0.05995200999360532, 'conservative_loss': 0.6310960658490657, 'time_step': 0.011777905464172364}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.69it/s, loss=0.631, td_loss=0.0635, conservative_loss=0.567]

[2m2024-08-01 16:47.23[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007915291786193848, 'time_algorithm_update': 0.009574694871902465, 'loss': 0.6310313407480717, 'td_loss': 0.06356817937921733, 'conservative_loss': 0.567463161110878, 'time_step': 0.010539673566818238}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:10<00:00, 94.24it/s, loss=0.611, td_loss=0.0631, conservative_loss=0.547]

[2m2024-08-01 16:47.34[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007897269725799561, 'time_algorithm_update': 0.00950147032737732, 'loss': 0.6101090370714665, 'td_loss': 0.06293568504136056, 'conservative_loss': 0.5471733515560627, 'time_step': 0.010475004434585572}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.38it/s, loss=0.59, td_loss=0.065, conservative_loss=0.525] 

[2m2024-08-01 16:47.44[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007272629737854004, 'time_algorithm_update': 0.008654195547103881, 'loss': 0.5899604279696942, 'td_loss': 0.06506127235572785, 'conservative_loss': 0.5248991543054581, 'time_step': 0.009554600954055786}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.60it/s, loss=0.582, td_loss=0.0629, conservative_loss=0.519]

[2m2024-08-01 16:47.54[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007881450653076171, 'time_algorithm_update': 0.009575380325317382, 'loss': 0.5816686831712723, 'td_loss': 0.06288101587770507, 'conservative_loss': 0.5187876671850681, 'time_step': 0.010547087907791138}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.20it/s, loss=0.575, td_loss=0.0622, conservative_loss=0.512]

[2m2024-08-01 16:48.05[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000775726318359375, 'time_algorithm_update': 0.00921360421180725, 'loss': 0.5746119612157344, 'td_loss': 0.06220731190755032, 'conservative_loss': 0.5124046493470669, 'time_step': 0.010164766550064087}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.63it/s, loss=0.57, td_loss=0.0609, conservative_loss=0.509] 

[2m2024-08-01 16:48.15[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007898883819580078, 'time_algorithm_update': 0.009574140310287476, 'loss': 0.5691181373000145, 'td_loss': 0.060827486439142375, 'conservative_loss': 0.5082906510829925, 'time_step': 0.010542705774307251}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:08<00:00, 119.88it/s, loss=0.557, td_loss=0.0556, conservative_loss=0.501]

[2m2024-08-01 16:48.24[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006385314464569092, 'time_algorithm_update': 0.0074515905380249025, 'loss': 0.5563439428806305, 'td_loss': 0.05556935998704284, 'conservative_loss': 0.500774583697319, 'time_step': 0.008249271631240845}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:09<00:00, 107.48it/s, loss=0.541, td_loss=0.0496, conservative_loss=0.491]


[2m2024-08-01 16:48.33[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007047679424285889, 'time_algorithm_update': 0.008321346759796143, 'loss': 0.5410098036527634, 'td_loss': 0.04955430132476613, 'conservative_loss': 0.4914555027782917, 'time_step': 0.009192480564117431}[0m [36mstep[0m=[35m10000[0m


Epoch 11/30: 100%|██████████| 1000/1000 [00:08<00:00, 120.07it/s, loss=0.54, td_loss=0.0487, conservative_loss=0.492]

[2m2024-08-01 16:48.41[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006409811973571777, 'time_algorithm_update': 0.007435963630676269, 'loss': 0.5403638392686844, 'td_loss': 0.04869130660779774, 'conservative_loss': 0.4916725327670574, 'time_step': 0.008236641883850098}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.13it/s, loss=0.538, td_loss=0.0474, conservative_loss=0.491]


[2m2024-08-01 16:48.51[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007539908885955811, 'time_algorithm_update': 0.008655474662780762, 'loss': 0.5386018519699574, 'td_loss': 0.04744251068588346, 'conservative_loss': 0.49115934151411056, 'time_step': 0.009577407836914062}[0m [36mstep[0m=[35m12000[0m


Epoch 13/30: 100%|██████████| 1000/1000 [00:10<00:00, 91.60it/s, loss=0.535, td_loss=0.0451, conservative_loss=0.489]

[2m2024-08-01 16:49.02[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008370554447174072, 'time_algorithm_update': 0.009752032518386841, 'loss': 0.5343049141168594, 'td_loss': 0.04515546371554956, 'conservative_loss': 0.4891494493186474, 'time_step': 0.010773832082748413}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:11<00:00, 89.98it/s, loss=0.529, td_loss=0.0461, conservative_loss=0.483]

[2m2024-08-01 16:49.13[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008709142208099365, 'time_algorithm_update': 0.009922554254531861, 'loss': 0.528484180405736, 'td_loss': 0.04598413243889809, 'conservative_loss': 0.48250004744529723, 'time_step': 0.010976277828216552}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:10<00:00, 92.23it/s, loss=0.525, td_loss=0.0451, conservative_loss=0.48] 

[2m2024-08-01 16:49.24[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008711025714874267, 'time_algorithm_update': 0.009671895742416382, 'loss': 0.5250693652629852, 'td_loss': 0.04518549077631906, 'conservative_loss': 0.47988387443125247, 'time_step': 0.010717695474624634}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.52it/s, loss=0.527, td_loss=0.0431, conservative_loss=0.484]

[2m2024-08-01 16:49.35[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008745169639587402, 'time_algorithm_update': 0.010091766357421875, 'loss': 0.5267294543385506, 'td_loss': 0.04297354891290888, 'conservative_loss': 0.4837559049725533, 'time_step': 0.011150687694549561}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.20it/s, loss=0.543, td_loss=0.0596, conservative_loss=0.484]

[2m2024-08-01 16:49.45[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007851464748382568, 'time_algorithm_update': 0.008629236459732056, 'loss': 0.5431522980928422, 'td_loss': 0.059347999727353454, 'conservative_loss': 0.48380429860949514, 'time_step': 0.00958148741722107}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:11<00:00, 86.73it/s, loss=0.537, td_loss=0.0549, conservative_loss=0.482]

[2m2024-08-01 16:49.57[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009063446521759034, 'time_algorithm_update': 0.010297032117843628, 'loss': 0.5363680323958397, 'td_loss': 0.0549151352243498, 'conservative_loss': 0.48145289766788485, 'time_step': 0.011390774965286255}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:12<00:00, 81.69it/s, loss=0.537, td_loss=0.0549, conservative_loss=0.483]


[2m2024-08-01 16:50.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009394896030426026, 'time_algorithm_update': 0.010946101188659667, 'loss': 0.5373324351757764, 'td_loss': 0.05473905685450882, 'conservative_loss': 0.48259337750077247, 'time_step': 0.012079885482788086}[0m [36mstep[0m=[35m19000[0m


Epoch 20/30: 100%|██████████| 1000/1000 [00:12<00:00, 80.64it/s, loss=0.527, td_loss=0.054, conservative_loss=0.473] 


[2m2024-08-01 16:50.22[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009809393882751465, 'time_algorithm_update': 0.011065498590469361, 'loss': 0.5274646827429533, 'td_loss': 0.05385019878181629, 'conservative_loss': 0.4736144836395979, 'time_step': 0.01223970890045166}[0m [36mstep[0m=[35m20000[0m


Epoch 21/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.09it/s, loss=0.522, td_loss=0.0522, conservative_loss=0.47] 

[2m2024-08-01 16:50.33[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008075408935546875, 'time_algorithm_update': 0.009299517393112182, 'loss': 0.5216895385086536, 'td_loss': 0.0520672712456435, 'conservative_loss': 0.4696222669482231, 'time_step': 0.01027755618095398}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:09<00:00, 106.92it/s, loss=0.531, td_loss=0.0532, conservative_loss=0.478]


[2m2024-08-01 16:50.42[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007097456455230713, 'time_algorithm_update': 0.008388470649719239, 'loss': 0.531108517497778, 'td_loss': 0.053285181128419935, 'conservative_loss': 0.47782333606481553, 'time_step': 0.009254119157791138}[0m [36mstep[0m=[35m22000[0m


Epoch 23/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.48it/s, loss=0.527, td_loss=0.0522, conservative_loss=0.475]

[2m2024-08-01 16:50.51[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006573350429534912, 'time_algorithm_update': 0.00792833375930786, 'loss': 0.5273567415922881, 'td_loss': 0.05203731191437692, 'conservative_loss': 0.47531942996382714, 'time_step': 0.008728523254394532}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:09<00:00, 107.25it/s, loss=0.525, td_loss=0.0517, conservative_loss=0.474]

[2m2024-08-01 16:51.00[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00066977858543396, 'time_algorithm_update': 0.008420065402984619, 'loss': 0.5252102850079536, 'td_loss': 0.05171602797228843, 'conservative_loss': 0.4734942567944527, 'time_step': 0.009231644630432129}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:09<00:00, 107.25it/s, loss=0.539, td_loss=0.0639, conservative_loss=0.475]

[2m2024-08-01 16:51.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006754591464996338, 'time_algorithm_update': 0.008405486583709716, 'loss': 0.539557466506958, 'td_loss': 0.06414042572397739, 'conservative_loss': 0.4754170411378145, 'time_step': 0.009226742506027221}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.46it/s, loss=0.543, td_loss=0.0637, conservative_loss=0.48]


[2m2024-08-01 16:51.18[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006447148323059082, 'time_algorithm_update': 0.00810366678237915, 'loss': 0.5434521545171738, 'td_loss': 0.06380777799431235, 'conservative_loss': 0.47964437657594683, 'time_step': 0.008885921716690063}[0m [36mstep[0m=[35m26000[0m


Epoch 27/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.66it/s, loss=0.536, td_loss=0.0613, conservative_loss=0.475]


[2m2024-08-01 16:51.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006743390560150146, 'time_algorithm_update': 0.008213319063186645, 'loss': 0.5357925686240196, 'td_loss': 0.061384307869710025, 'conservative_loss': 0.47440826036036016, 'time_step': 0.009028785228729248}[0m [36mstep[0m=[35m27000[0m


Epoch 28/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.79it/s, loss=0.539, td_loss=0.0618, conservative_loss=0.477]


[2m2024-08-01 16:51.36[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006399099826812744, 'time_algorithm_update': 0.007999305486679077, 'loss': 0.5384131043404341, 'td_loss': 0.0616868359381333, 'conservative_loss': 0.4767262688577175, 'time_step': 0.008779579877853393}[0m [36mstep[0m=[35m28000[0m


Epoch 29/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.68it/s, loss=0.54, td_loss=0.0615, conservative_loss=0.479]


[2m2024-08-01 16:51.45[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006542947292327881, 'time_algorithm_update': 0.007989522695541382, 'loss': 0.5402810696661472, 'td_loss': 0.06153192064445466, 'conservative_loss': 0.47874914848804473, 'time_step': 0.008787793159484863}[0m [36mstep[0m=[35m29000[0m


Epoch 30/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.35it/s, loss=0.534, td_loss=0.0625, conservative_loss=0.471]

[2m2024-08-01 16:51.54[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801164650: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006575171947479249, 'time_algorithm_update': 0.008096364974975586, 'loss': 0.5342231878787279, 'td_loss': 0.06256174182984978, 'conservative_loss': 0.4716614463478327, 'time_step': 0.008892950534820556}[0m [36mstep[0m=[35m30000[0m





100
[2m2024-08-01 16:51.55[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:51.55[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240801165155[0m
[2m2024-08-01 16:51.55[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:51.55[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:51.55[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'param

Epoch 1/30: 100%|██████████| 1000/1000 [00:09<00:00, 106.77it/s, loss=0.843, td_loss=0.0609, conservative_loss=0.782]


[2m2024-08-01 16:52.04[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006959433555603027, 'time_algorithm_update': 0.008431556224822999, 'loss': 0.8418922030627728, 'td_loss': 0.06074385117646307, 'conservative_loss': 0.7811483518183231, 'time_step': 0.009273027420043945}[0m [36mstep[0m=[35m1000[0m


Epoch 2/30: 100%|██████████| 1000/1000 [00:09<00:00, 108.46it/s, loss=0.724, td_loss=0.0552, conservative_loss=0.669]

[2m2024-08-01 16:52.13[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006702632904052734, 'time_algorithm_update': 0.00832580852508545, 'loss': 0.7236943686306476, 'td_loss': 0.05520231819152832, 'conservative_loss': 0.6684920501708984, 'time_step': 0.009131468057632446}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.81it/s, loss=0.699, td_loss=0.0518, conservative_loss=0.647]

[2m2024-08-01 16:52.22[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006671578884124756, 'time_algorithm_update': 0.008200788736343384, 'loss': 0.6992212347984313, 'td_loss': 0.05191099138464779, 'conservative_loss': 0.6473102440834045, 'time_step': 0.009014403104782104}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.24it/s, loss=0.688, td_loss=0.0514, conservative_loss=0.636]


[2m2024-08-01 16:52.32[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006657357215881347, 'time_algorithm_update': 0.008181097269058227, 'loss': 0.6874903870820999, 'td_loss': 0.05142902808636427, 'conservative_loss': 0.6360613591372967, 'time_step': 0.00898490858078003}[0m [36mstep[0m=[35m4000[0m


Epoch 5/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.93it/s, loss=0.677, td_loss=0.0489, conservative_loss=0.628]

[2m2024-08-01 16:52.41[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006519174575805664, 'time_algorithm_update': 0.008059710741043091, 'loss': 0.6772215656936169, 'td_loss': 0.048929244115483016, 'conservative_loss': 0.6282923220098019, 'time_step': 0.008850927829742433}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.31it/s, loss=0.674, td_loss=0.0494, conservative_loss=0.625]

[2m2024-08-01 16:52.49[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006533205509185791, 'time_algorithm_update': 0.007879392862319947, 'loss': 0.6743240205943585, 'td_loss': 0.04943207343388349, 'conservative_loss': 0.6248919477164745, 'time_step': 0.008668357610702514}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:09<00:00, 108.85it/s, loss=0.677, td_loss=0.0489, conservative_loss=0.628]

[2m2024-08-01 16:52.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006720390319824218, 'time_algorithm_update': 0.008284194946289063, 'loss': 0.6772809426188469, 'td_loss': 0.04886351478961296, 'conservative_loss': 0.6284174274206161, 'time_step': 0.00909589672088623}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.81it/s, loss=0.663, td_loss=0.0475, conservative_loss=0.616]

[2m2024-08-01 16:53.08[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006661539077758789, 'time_algorithm_update': 0.008209465026855468, 'loss': 0.6633799999058246, 'td_loss': 0.04756751444074325, 'conservative_loss': 0.6158124851286412, 'time_step': 0.009017913579940796}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:08<00:00, 115.53it/s, loss=0.669, td_loss=0.0628, conservative_loss=0.606]

[2m2024-08-01 16:53.16[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006504640579223633, 'time_algorithm_update': 0.0077716739177703855, 'loss': 0.6692908017635345, 'td_loss': 0.06277141640428453, 'conservative_loss': 0.6065193856358528, 'time_step': 0.008569605112075806}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.73it/s, loss=0.655, td_loss=0.0548, conservative_loss=0.6] 

[2m2024-08-01 16:53.25[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006614794731140137, 'time_algorithm_update': 0.008067350387573242, 'loss': 0.6548039131760597, 'td_loss': 0.054718632528558374, 'conservative_loss': 0.6000852808654308, 'time_step': 0.008867210626602173}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.34it/s, loss=0.648, td_loss=0.0492, conservative_loss=0.599]

[2m2024-08-01 16:53.34[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006645958423614502, 'time_algorithm_update': 0.008172011137008666, 'loss': 0.6479013892114163, 'td_loss': 0.04916343433596194, 'conservative_loss': 0.5987379545271396, 'time_step': 0.008974724054336547}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:09<00:00, 105.70it/s, loss=0.636, td_loss=0.0473, conservative_loss=0.589]

[2m2024-08-01 16:53.44[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006933915615081787, 'time_algorithm_update': 0.008524036169052123, 'loss': 0.6349477190971374, 'td_loss': 0.047086801146157084, 'conservative_loss': 0.5878609177172184, 'time_step': 0.00936211633682251}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.64it/s, loss=0.638, td_loss=0.046, conservative_loss=0.592]

[2m2024-08-01 16:53.53[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006608424186706543, 'time_algorithm_update': 0.008143312215805054, 'loss': 0.6368659809529781, 'td_loss': 0.04590417528897524, 'conservative_loss': 0.5909618058502674, 'time_step': 0.008947570085525512}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:08<00:00, 117.96it/s, loss=0.625, td_loss=0.0463, conservative_loss=0.578]

[2m2024-08-01 16:54.01[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006449639797210694, 'time_algorithm_update': 0.00761318302154541, 'loss': 0.6250357170701026, 'td_loss': 0.046258309442549946, 'conservative_loss': 0.5787774080336094, 'time_step': 0.008394911766052246}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:08<00:00, 113.55it/s, loss=0.618, td_loss=0.0453, conservative_loss=0.573]

[2m2024-08-01 16:54.10[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006535565853118897, 'time_algorithm_update': 0.007921424150466918, 'loss': 0.6178034926652909, 'td_loss': 0.045291262211278084, 'conservative_loss': 0.5725122305452823, 'time_step': 0.008720815658569336}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:09<00:00, 107.10it/s, loss=0.626, td_loss=0.045, conservative_loss=0.581]

[2m2024-08-01 16:54.19[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000686521053314209, 'time_algorithm_update': 0.008409698247909545, 'loss': 0.6257044024765491, 'td_loss': 0.045000983820296825, 'conservative_loss': 0.5807034193575382, 'time_step': 0.009242039918899537}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.42it/s, loss=0.632, td_loss=0.0619, conservative_loss=0.57]

[2m2024-08-01 16:54.29[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006670708656311035, 'time_algorithm_update': 0.00823174524307251, 'loss': 0.6312299635410309, 'td_loss': 0.06181308315414935, 'conservative_loss': 0.5694168809056283, 'time_step': 0.009046627044677734}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:08<00:00, 116.73it/s, loss=0.619, td_loss=0.0554, conservative_loss=0.564]

[2m2024-08-01 16:54.37[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006308977603912354, 'time_algorithm_update': 0.007724953889846802, 'loss': 0.6190753297507763, 'td_loss': 0.055400307798758146, 'conservative_loss': 0.5636750220060348, 'time_step': 0.008487914562225342}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.55it/s, loss=0.617, td_loss=0.0547, conservative_loss=0.562]

[2m2024-08-01 16:54.46[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006672766208648682, 'time_algorithm_update': 0.008067676544189453, 'loss': 0.6171102287471294, 'td_loss': 0.05474732398288325, 'conservative_loss': 0.5623629048168659, 'time_step': 0.008873704195022583}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.53it/s, loss=0.617, td_loss=0.0538, conservative_loss=0.563]


[2m2024-08-01 16:54.55[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000678001880645752, 'time_algorithm_update': 0.008225090026855468, 'loss': 0.6166936525404453, 'td_loss': 0.053877193483989685, 'conservative_loss': 0.5628164582252503, 'time_step': 0.009043653964996339}[0m [36mstep[0m=[35m20000[0m


Epoch 21/30: 100%|██████████| 1000/1000 [00:09<00:00, 108.16it/s, loss=0.614, td_loss=0.0535, conservative_loss=0.56]

[2m2024-08-01 16:55.05[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000640103816986084, 'time_algorithm_update': 0.008393300294876099, 'loss': 0.6138881583809852, 'td_loss': 0.05344054067181423, 'conservative_loss': 0.5604476180970669, 'time_step': 0.009167042970657348}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.29it/s, loss=0.61, td_loss=0.0525, conservative_loss=0.558] 


[2m2024-08-01 16:55.15[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007244236469268799, 'time_algorithm_update': 0.009085851192474365, 'loss': 0.609911151021719, 'td_loss': 0.05249224593536928, 'conservative_loss': 0.5574189046025276, 'time_step': 0.009966180324554444}[0m [36mstep[0m=[35m22000[0m


Epoch 23/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.49it/s, loss=0.597, td_loss=0.0522, conservative_loss=0.545]

[2m2024-08-01 16:55.24[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006422550678253174, 'time_algorithm_update': 0.00828326153755188, 'loss': 0.5966836862266064, 'td_loss': 0.052091113653732465, 'conservative_loss': 0.5445925724804401, 'time_step': 0.009059673547744751}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:09<00:00, 105.60it/s, loss=0.59, td_loss=0.0529, conservative_loss=0.537]

[2m2024-08-01 16:55.33[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000644479513168335, 'time_algorithm_update': 0.008611259460449218, 'loss': 0.5898942232728005, 'td_loss': 0.05291709862882271, 'conservative_loss': 0.5369771245121956, 'time_step': 0.009388304948806762}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:09<00:00, 106.02it/s, loss=0.598, td_loss=0.068, conservative_loss=0.53] 

[2m2024-08-01 16:55.43[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006386981010437011, 'time_algorithm_update': 0.008585206508636475, 'loss': 0.5978603624999523, 'td_loss': 0.06797891111765057, 'conservative_loss': 0.5298814519047738, 'time_step': 0.00935507583618164}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.27it/s, loss=0.588, td_loss=0.0643, conservative_loss=0.524]


[2m2024-08-01 16:55.52[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006426568031311035, 'time_algorithm_update': 0.008305039167404174, 'loss': 0.5883510216921568, 'td_loss': 0.06447873564250767, 'conservative_loss': 0.523872285604477, 'time_step': 0.009074762582778931}[0m [36mstep[0m=[35m26000[0m


Epoch 27/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.74it/s, loss=0.585, td_loss=0.0643, conservative_loss=0.52]

[2m2024-08-01 16:56.02[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006597933769226074, 'time_algorithm_update': 0.008944133520126343, 'loss': 0.5847258841693401, 'td_loss': 0.0641868375299964, 'conservative_loss': 0.520539046227932, 'time_step': 0.009741906881332398}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.76it/s, loss=0.578, td_loss=0.0646, conservative_loss=0.513]

[2m2024-08-01 16:56.11[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006594390869140625, 'time_algorithm_update': 0.008749510049819947, 'loss': 0.5782437467575073, 'td_loss': 0.06482705985801294, 'conservative_loss': 0.5134166868031025, 'time_step': 0.009548375606536865}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.86it/s, loss=0.572, td_loss=0.066, conservative_loss=0.506]


[2m2024-08-01 16:56.21[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006423752307891846, 'time_algorithm_update': 0.008946021318435669, 'loss': 0.571931579053402, 'td_loss': 0.06600988796725869, 'conservative_loss': 0.5059216913133859, 'time_step': 0.009729520082473755}[0m [36mstep[0m=[35m29000[0m


Epoch 30/30: 100%|██████████| 1000/1000 [00:09<00:00, 104.28it/s, loss=0.573, td_loss=0.0653, conservative_loss=0.508]

[2m2024-08-01 16:56.31[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165155: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006400241851806641, 'time_algorithm_update': 0.00873309874534607, 'loss': 0.5729221959114075, 'td_loss': 0.06513689399929717, 'conservative_loss': 0.507785302489996, 'time_step': 0.009506165027618408}[0m [36mstep[0m=[35m30000[0m





200
[2m2024-08-01 16:56.31[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 16:56.31[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240801165631[0m
[2m2024-08-01 16:56.31[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 16:56.31[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 16:56.31[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'param

Epoch 1/30: 100%|██████████| 1000/1000 [00:09<00:00, 104.49it/s, loss=0.82, td_loss=0.0528, conservative_loss=0.767]


[2m2024-08-01 16:56.41[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006794476509094238, 'time_algorithm_update': 0.008676171541213988, 'loss': 0.8189535508155823, 'td_loss': 0.05300265932129696, 'conservative_loss': 0.7659508911967278, 'time_step': 0.00948643159866333}[0m [36mstep[0m=[35m1000[0m


Epoch 2/30: 100%|██████████| 1000/1000 [00:09<00:00, 110.65it/s, loss=0.661, td_loss=0.0685, conservative_loss=0.592]

[2m2024-08-01 16:56.50[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006287522315979004, 'time_algorithm_update': 0.008207820415496826, 'loss': 0.6609359018504619, 'td_loss': 0.0685852788668126, 'conservative_loss': 0.5923506229519844, 'time_step': 0.00896359658241272}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:09<00:00, 104.78it/s, loss=0.623, td_loss=0.0706, conservative_loss=0.553]

[2m2024-08-01 16:57.00[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000643291711807251, 'time_algorithm_update': 0.008690287590026855, 'loss': 0.6230593519508839, 'td_loss': 0.07048123521404341, 'conservative_loss': 0.5525781163573265, 'time_step': 0.009465803861618043}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.77it/s, loss=0.611, td_loss=0.0735, conservative_loss=0.538]

[2m2024-08-01 16:57.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000657433032989502, 'time_algorithm_update': 0.008767049074172974, 'loss': 0.611829783409834, 'td_loss': 0.07371163318492473, 'conservative_loss': 0.5381181503534317, 'time_step': 0.00955706787109375}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:08<00:00, 114.79it/s, loss=0.601, td_loss=0.0734, conservative_loss=0.527]

[2m2024-08-01 16:57.18[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006394259929656983, 'time_algorithm_update': 0.007861262559890747, 'loss': 0.6006836615204811, 'td_loss': 0.07345510358782485, 'conservative_loss': 0.5272285578250885, 'time_step': 0.008636521816253661}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.66it/s, loss=0.598, td_loss=0.0734, conservative_loss=0.525]

[2m2024-08-01 16:57.28[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006570365428924561, 'time_algorithm_update': 0.008961407899856567, 'loss': 0.5981146441400051, 'td_loss': 0.07330647052638233, 'conservative_loss': 0.5248081726431847, 'time_step': 0.00975451159477234}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:09<00:00, 105.60it/s, loss=0.593, td_loss=0.0729, conservative_loss=0.52]

[2m2024-08-01 16:57.37[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006537001132965088, 'time_algorithm_update': 0.008595687150955201, 'loss': 0.5926358420848846, 'td_loss': 0.07293699250323699, 'conservative_loss': 0.5196988488435745, 'time_step': 0.009381872892379762}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.47it/s, loss=0.586, td_loss=0.0739, conservative_loss=0.513]

[2m2024-08-01 16:57.47[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006654376983642578, 'time_algorithm_update': 0.008779852628707886, 'loss': 0.5859991990029811, 'td_loss': 0.07372183213103563, 'conservative_loss': 0.5122773665189743, 'time_step': 0.009578665256500245}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.56it/s, loss=0.585, td_loss=0.0672, conservative_loss=0.517]


[2m2024-08-01 16:57.57[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006694366931915283, 'time_algorithm_update': 0.008764177322387696, 'loss': 0.5833528935611249, 'td_loss': 0.06689918656367809, 'conservative_loss': 0.5164537068009376, 'time_step': 0.009569265365600587}[0m [36mstep[0m=[35m9000[0m


Epoch 10/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.45it/s, loss=0.582, td_loss=0.0619, conservative_loss=0.52]

[2m2024-08-01 16:58.06[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000661841869354248, 'time_algorithm_update': 0.008875583887100219, 'loss': 0.581266762316227, 'td_loss': 0.06188708851672709, 'conservative_loss': 0.5193796744048595, 'time_step': 0.009673274278640747}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.34it/s, loss=0.576, td_loss=0.0612, conservative_loss=0.515]

[2m2024-08-01 16:58.17[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007063238620758056, 'time_algorithm_update': 0.0092111918926239, 'loss': 0.5761566311120987, 'td_loss': 0.06133536791941151, 'conservative_loss': 0.5148212625086308, 'time_step': 0.010062703847885131}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.43it/s, loss=0.576, td_loss=0.0624, conservative_loss=0.513]

[2m2024-08-01 16:58.26[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006716949939727783, 'time_algorithm_update': 0.008966580390930175, 'loss': 0.5761074874699116, 'td_loss': 0.062420912160305306, 'conservative_loss': 0.5136865751445293, 'time_step': 0.009773106813430786}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.47it/s, loss=0.575, td_loss=0.0621, conservative_loss=0.512]


[2m2024-08-01 16:58.36[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006580190658569335, 'time_algorithm_update': 0.008789427280426025, 'loss': 0.5748715488612652, 'td_loss': 0.062277092013973745, 'conservative_loss': 0.5125944557189941, 'time_step': 0.009584305047988892}[0m [36mstep[0m=[35m13000[0m


Epoch 14/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.00it/s, loss=0.571, td_loss=0.0627, conservative_loss=0.509]

[2m2024-08-01 16:58.46[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006961102485656738, 'time_algorithm_update': 0.009268474817276, 'loss': 0.5718169732391835, 'td_loss': 0.06280673839524388, 'conservative_loss': 0.5090102348327636, 'time_step': 0.010111067771911621}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:09<00:00, 105.29it/s, loss=0.574, td_loss=0.0604, conservative_loss=0.514]

[2m2024-08-01 16:58.56[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006571366786956788, 'time_algorithm_update': 0.00862851619720459, 'loss': 0.5736434590220452, 'td_loss': 0.060419033003039656, 'conservative_loss': 0.5132244256734848, 'time_step': 0.009419756889343262}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.61it/s, loss=0.564, td_loss=0.0623, conservative_loss=0.502]


[2m2024-08-01 16:59.05[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006618740558624268, 'time_algorithm_update': 0.00877630877494812, 'loss': 0.5642969590723514, 'td_loss': 0.06250408035609871, 'conservative_loss': 0.5017928782999516, 'time_step': 0.009570161342620849}[0m [36mstep[0m=[35m16000[0m


Epoch 17/30: 100%|██████████| 1000/1000 [00:10<00:00, 94.23it/s, loss=0.589, td_loss=0.0807, conservative_loss=0.508]


[2m2024-08-01 16:59.16[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007858552932739258, 'time_algorithm_update': 0.009546635627746582, 'loss': 0.5882832390367985, 'td_loss': 0.08050290203373879, 'conservative_loss': 0.5077803376317024, 'time_step': 0.01049462628364563}[0m [36mstep[0m=[35m17000[0m


Epoch 18/30: 100%|██████████| 1000/1000 [00:10<00:00, 91.07it/s, loss=0.587, td_loss=0.0752, conservative_loss=0.512]


[2m2024-08-01 16:59.27[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008694698810577392, 'time_algorithm_update': 0.009800813674926757, 'loss': 0.5869553363919258, 'td_loss': 0.07500442668003962, 'conservative_loss': 0.5119509096741677, 'time_step': 0.010851606607437133}[0m [36mstep[0m=[35m18000[0m


Epoch 19/30: 100%|██████████| 1000/1000 [00:09<00:00, 106.13it/s, loss=0.589, td_loss=0.0756, conservative_loss=0.513]


[2m2024-08-01 16:59.37[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007436096668243409, 'time_algorithm_update': 0.008394023180007934, 'loss': 0.58788553327322, 'td_loss': 0.07538407720718533, 'conservative_loss': 0.5125014555454254, 'time_step': 0.009308188915252686}[0m [36mstep[0m=[35m19000[0m


Epoch 20/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.17it/s, loss=0.577, td_loss=0.0739, conservative_loss=0.503]

[2m2024-08-01 16:59.48[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007443108558654785, 'time_algorithm_update': 0.010325307607650757, 'loss': 0.5770918093025684, 'td_loss': 0.07379504036530853, 'conservative_loss': 0.5032967689037323, 'time_step': 0.011231645345687866}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:09<00:00, 103.25it/s, loss=0.581, td_loss=0.0741, conservative_loss=0.507]

[2m2024-08-01 16:59.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006516287326812744, 'time_algorithm_update': 0.008815166711807251, 'loss': 0.5808317349255085, 'td_loss': 0.07398650279268622, 'conservative_loss': 0.5068452329337597, 'time_step': 0.009602588891983033}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.56it/s, loss=0.582, td_loss=0.0743, conservative_loss=0.507]


[2m2024-08-01 17:00.08[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007735581398010254, 'time_algorithm_update': 0.009270158290863037, 'loss': 0.5802364587783814, 'td_loss': 0.07391463636839762, 'conservative_loss': 0.5063218225240708, 'time_step': 0.01022337293624878}[0m [36mstep[0m=[35m22000[0m


Epoch 23/30: 100%|██████████| 1000/1000 [00:10<00:00, 94.89it/s, loss=0.576, td_loss=0.0739, conservative_loss=0.502]


[2m2024-08-01 17:00.19[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008096842765808105, 'time_algorithm_update': 0.009428564310073852, 'loss': 0.5754106575548649, 'td_loss': 0.07383528103725985, 'conservative_loss': 0.5015753768384457, 'time_step': 0.010414897680282593}[0m [36mstep[0m=[35m23000[0m


Epoch 24/30: 100%|██████████| 1000/1000 [00:10<00:00, 91.51it/s, loss=0.579, td_loss=0.0743, conservative_loss=0.505]


[2m2024-08-01 17:00.30[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008685827255249023, 'time_algorithm_update': 0.009742688417434692, 'loss': 0.5792126190066338, 'td_loss': 0.07430133030051365, 'conservative_loss': 0.5049112888574601, 'time_step': 0.010792457818984986}[0m [36mstep[0m=[35m24000[0m


Epoch 25/30: 100%|██████████| 1000/1000 [00:09<00:00, 106.46it/s, loss=0.588, td_loss=0.0864, conservative_loss=0.502]

[2m2024-08-01 17:00.39[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000761671781539917, 'time_algorithm_update': 0.008362396717071533, 'loss': 0.587617859184742, 'td_loss': 0.08623278113920242, 'conservative_loss': 0.5013850777447224, 'time_step': 0.009286612033843995}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:09<00:00, 107.91it/s, loss=0.584, td_loss=0.0796, conservative_loss=0.504]


[2m2024-08-01 17:00.48[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006502525806427002, 'time_algorithm_update': 0.008401347637176513, 'loss': 0.5842703235149384, 'td_loss': 0.07988850425486453, 'conservative_loss': 0.5043818196952343, 'time_step': 0.009186868906021118}[0m [36mstep[0m=[35m26000[0m


Epoch 27/30: 100%|██████████| 1000/1000 [00:09<00:00, 108.14it/s, loss=0.588, td_loss=0.0811, conservative_loss=0.507]

[2m2024-08-01 17:00.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006455080509185791, 'time_algorithm_update': 0.008384893417358399, 'loss': 0.5875113739371299, 'td_loss': 0.08099465010361746, 'conservative_loss': 0.5065167244821787, 'time_step': 0.009162232160568237}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:09<00:00, 107.60it/s, loss=0.588, td_loss=0.0819, conservative_loss=0.506]

[2m2024-08-01 17:01.07[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000686786413192749, 'time_algorithm_update': 0.008382120370864868, 'loss': 0.5883990431725978, 'td_loss': 0.08217467076610774, 'conservative_loss': 0.5062243727445602, 'time_step': 0.00920749306678772}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:09<00:00, 109.25it/s, loss=0.585, td_loss=0.0812, conservative_loss=0.504]

[2m2024-08-01 17:01.16[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006593904495239258, 'time_algorithm_update': 0.008277243137359619, 'loss': 0.5851619395613671, 'td_loss': 0.08099357592733578, 'conservative_loss': 0.5041683629155159, 'time_step': 0.009073993444442749}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.48it/s, loss=0.585, td_loss=0.0807, conservative_loss=0.504]


[2m2024-08-01 17:01.28[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801165631: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008660736083984375, 'time_algorithm_update': 0.010224739789962769, 'loss': 0.5847840510904789, 'td_loss': 0.08065743552148343, 'conservative_loss': 0.5041266159713268, 'time_step': 0.011278284072875977}[0m [36mstep[0m=[35m30000[0m
400
[2m2024-08-01 17:01.28[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 17:01.28[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20240801170128[0m
[2m2024-08-01 17:01.28[0m [[32m[1mdebug    [0m] 

Epoch 1/30: 100%|██████████| 1000/1000 [00:10<00:00, 94.91it/s, loss=0.868, td_loss=0.0539, conservative_loss=0.814]

[2m2024-08-01 17:01.39[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008260834217071533, 'time_algorithm_update': 0.009405628442764283, 'loss': 0.866164543390274, 'td_loss': 0.053817540767602624, 'conservative_loss': 0.8123470023870468, 'time_step': 0.010410383701324463}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:12<00:00, 81.84it/s, loss=0.731, td_loss=0.0633, conservative_loss=0.668]


[2m2024-08-01 17:01.51[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0009476592540740967, 'time_algorithm_update': 0.010919328927993775, 'loss': 0.7311883037090301, 'td_loss': 0.06331702529964968, 'conservative_loss': 0.6678712792694569, 'time_step': 0.012059542655944824}[0m [36mstep[0m=[35m2000[0m


Epoch 3/30: 100%|██████████| 1000/1000 [00:11<00:00, 90.16it/s, loss=0.689, td_loss=0.0692, conservative_loss=0.62] 

[2m2024-08-01 17:02.02[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008672885894775391, 'time_algorithm_update': 0.009908074378967286, 'loss': 0.6885417395532131, 'td_loss': 0.06907490029232577, 'conservative_loss': 0.6194668393731118, 'time_step': 0.010955539703369141}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:10<00:00, 91.77it/s, loss=0.66, td_loss=0.0753, conservative_loss=0.585] 

[2m2024-08-01 17:02.13[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008634819984436035, 'time_algorithm_update': 0.009719582319259644, 'loss': 0.6593099030256271, 'td_loss': 0.07514191086078062, 'conservative_loss': 0.5841679919064044, 'time_step': 0.010762885093688964}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.27it/s, loss=0.657, td_loss=0.0774, conservative_loss=0.58] 


[2m2024-08-01 17:02.24[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008509066104888916, 'time_algorithm_update': 0.009559374332427978, 'loss': 0.6574817831218243, 'td_loss': 0.07744223594479263, 'conservative_loss': 0.5800395461022854, 'time_step': 0.010591002464294433}[0m [36mstep[0m=[35m5000[0m


Epoch 6/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.58it/s, loss=0.647, td_loss=0.0786, conservative_loss=0.568]


[2m2024-08-01 17:02.35[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008274977207183838, 'time_algorithm_update': 0.010156074285507202, 'loss': 0.6464652967453003, 'td_loss': 0.07847062799707055, 'conservative_loss': 0.5679946681857109, 'time_step': 0.01115632152557373}[0m [36mstep[0m=[35m6000[0m


Epoch 7/30: 100%|██████████| 1000/1000 [00:11<00:00, 89.15it/s, loss=0.641, td_loss=0.0784, conservative_loss=0.562]


[2m2024-08-01 17:02.46[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006307165622711181, 'time_algorithm_update': 0.010367899656295776, 'loss': 0.6404950215816497, 'td_loss': 0.07830012642266229, 'conservative_loss': 0.562194894194603, 'time_step': 0.01113174867630005}[0m [36mstep[0m=[35m7000[0m


Epoch 8/30: 100%|██████████| 1000/1000 [00:11<00:00, 85.52it/s, loss=0.634, td_loss=0.0791, conservative_loss=0.555]

[2m2024-08-01 17:02.58[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006439740657806397, 'time_algorithm_update': 0.010823108673095704, 'loss': 0.6338491795063019, 'td_loss': 0.07892152950027957, 'conservative_loss': 0.5549276500940323, 'time_step': 0.011603718996047974}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:11<00:00, 90.36it/s, loss=0.629, td_loss=0.0761, conservative_loss=0.553]

[2m2024-08-01 17:03.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006292376518249512, 'time_algorithm_update': 0.010219846725463867, 'loss': 0.628590033262968, 'td_loss': 0.07602353802975267, 'conservative_loss': 0.5525664954185486, 'time_step': 0.010980933666229249}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:11<00:00, 90.63it/s, loss=0.614, td_loss=0.0719, conservative_loss=0.542]

[2m2024-08-01 17:03.20[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006270830631256104, 'time_algorithm_update': 0.010194346189498901, 'loss': 0.6137369413077831, 'td_loss': 0.07195887097204104, 'conservative_loss': 0.5417780703306199, 'time_step': 0.01095167636871338}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:11<00:00, 85.89it/s, loss=0.62, td_loss=0.0688, conservative_loss=0.551] 

[2m2024-08-01 17:03.32[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006345140933990478, 'time_algorithm_update': 0.010772571325302124, 'loss': 0.6195843545496463, 'td_loss': 0.06885518394876272, 'conservative_loss': 0.5507291711270809, 'time_step': 0.011542802572250366}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.62it/s, loss=0.616, td_loss=0.0707, conservative_loss=0.546]

[2m2024-08-01 17:03.43[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006176185607910156, 'time_algorithm_update': 0.010580179214477538, 'loss': 0.6161397307813168, 'td_loss': 0.07070776007603854, 'conservative_loss': 0.5454319702386856, 'time_step': 0.011328962802886963}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:11<00:00, 90.09it/s, loss=0.602, td_loss=0.071, conservative_loss=0.531] 

[2m2024-08-01 17:03.54[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006323509216308594, 'time_algorithm_update': 0.010252932786941529, 'loss': 0.6024921226203441, 'td_loss': 0.07102308058179915, 'conservative_loss': 0.5314690427482128, 'time_step': 0.011015882968902588}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.26it/s, loss=0.608, td_loss=0.0703, conservative_loss=0.538]

[2m2024-08-01 17:04.06[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006420884132385254, 'time_algorithm_update': 0.010597335577011108, 'loss': 0.6082133322656155, 'td_loss': 0.07045097375055775, 'conservative_loss': 0.537762358546257, 'time_step': 0.011372707605361939}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.38it/s, loss=0.606, td_loss=0.0713, conservative_loss=0.535]

[2m2024-08-01 17:04.17[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006291227340698242, 'time_algorithm_update': 0.010591469764709472, 'loss': 0.6061627653837204, 'td_loss': 0.07132541132671759, 'conservative_loss': 0.5348373540639877, 'time_step': 0.011357306718826295}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:11<00:00, 89.26it/s, loss=0.594, td_loss=0.0705, conservative_loss=0.524]

[2m2024-08-01 17:04.28[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006153149604797364, 'time_algorithm_update': 0.010369308233261109, 'loss': 0.5947334567010403, 'td_loss': 0.07062486591516062, 'conservative_loss': 0.5241085905432701, 'time_step': 0.011118645668029786}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.21it/s, loss=0.631, td_loss=0.0985, conservative_loss=0.533]

[2m2024-08-01 17:04.40[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006309497356414795, 'time_algorithm_update': 0.01061816668510437, 'loss': 0.6310209077596665, 'td_loss': 0.09852769904676825, 'conservative_loss': 0.5324932086765766, 'time_step': 0.011380764484405517}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:11<00:00, 84.58it/s, loss=0.629, td_loss=0.0938, conservative_loss=0.535]

[2m2024-08-01 17:04.52[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006423470973968506, 'time_algorithm_update': 0.01095554494857788, 'loss': 0.6290208424031735, 'td_loss': 0.09393978282250463, 'conservative_loss': 0.5350810599029064, 'time_step': 0.011736235857009888}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.86it/s, loss=0.633, td_loss=0.0929, conservative_loss=0.54] 

[2m2024-08-01 17:05.03[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006254141330718994, 'time_algorithm_update': 0.010539954662322997, 'loss': 0.6324902084767818, 'td_loss': 0.09281357997749001, 'conservative_loss': 0.5396766285300255, 'time_step': 0.011299062967300415}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:10<00:00, 92.72it/s, loss=0.626, td_loss=0.0931, conservative_loss=0.533]

[2m2024-08-01 17:05.14[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006396279335021972, 'time_algorithm_update': 0.009931198120117187, 'loss': 0.6259212038516998, 'td_loss': 0.09299977832008154, 'conservative_loss': 0.5329214254617691, 'time_step': 0.010703089475631714}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:09<00:00, 105.41it/s, loss=0.614, td_loss=0.0917, conservative_loss=0.522]

[2m2024-08-01 17:05.23[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006384861469268799, 'time_algorithm_update': 0.008634754657745361, 'loss': 0.6143526683151722, 'td_loss': 0.0918640068480745, 'conservative_loss': 0.5224886603951454, 'time_step': 0.00940371322631836}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:11<00:00, 89.85it/s, loss=0.618, td_loss=0.0909, conservative_loss=0.527]

[2m2024-08-01 17:05.35[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006994476318359375, 'time_algorithm_update': 0.010179765701293945, 'loss': 0.6189669780135155, 'td_loss': 0.09112621097080409, 'conservative_loss': 0.5278407673686742, 'time_step': 0.011026369333267212}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.40it/s, loss=0.627, td_loss=0.0916, conservative_loss=0.535]

[2m2024-08-01 17:05.46[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006141252517700195, 'time_algorithm_update': 0.010614391565322876, 'loss': 0.6266463538408279, 'td_loss': 0.09157445140834898, 'conservative_loss': 0.5350719027519226, 'time_step': 0.011356947660446167}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.51it/s, loss=0.622, td_loss=0.0908, conservative_loss=0.531]

[2m2024-08-01 17:05.57[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006167867183685303, 'time_algorithm_update': 0.010466880798339843, 'loss': 0.6215354864895344, 'td_loss': 0.09067158848326654, 'conservative_loss': 0.5308638980686664, 'time_step': 0.011213768005371093}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.98it/s, loss=0.633, td_loss=0.0963, conservative_loss=0.537]

[2m2024-08-01 17:06.09[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000619748592376709, 'time_algorithm_update': 0.010397894620895385, 'loss': 0.6332242967188358, 'td_loss': 0.09616645747143775, 'conservative_loss': 0.5370578392744064, 'time_step': 0.01115419864654541}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.60it/s, loss=0.617, td_loss=0.0902, conservative_loss=0.527]

[2m2024-08-01 17:06.20[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006371266841888427, 'time_algorithm_update': 0.010435509443283081, 'loss': 0.6181754731237888, 'td_loss': 0.09071279105171562, 'conservative_loss': 0.5274626816511154, 'time_step': 0.011202145099639892}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.03it/s, loss=0.622, td_loss=0.0894, conservative_loss=0.533]

[2m2024-08-01 17:06.31[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006497888565063476, 'time_algorithm_update': 0.010610073566436768, 'loss': 0.6235115283727646, 'td_loss': 0.08973612484987825, 'conservative_loss': 0.5337754031419754, 'time_step': 0.011397919178009034}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.60it/s, loss=0.616, td_loss=0.0898, conservative_loss=0.527]

[2m2024-08-01 17:06.43[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000621584415435791, 'time_algorithm_update': 0.01045151138305664, 'loss': 0.6164745986759662, 'td_loss': 0.08977384504908696, 'conservative_loss': 0.5267007526159286, 'time_step': 0.011202389001846313}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.96it/s, loss=0.624, td_loss=0.0897, conservative_loss=0.534]

[2m2024-08-01 17:06.54[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006327157020568848, 'time_algorithm_update': 0.010513904333114624, 'loss': 0.6231881947517395, 'td_loss': 0.08954450190765784, 'conservative_loss': 0.5336436922848224, 'time_step': 0.011278674602508545}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:11<00:00, 89.79it/s, loss=0.625, td_loss=0.0916, conservative_loss=0.533]


[2m2024-08-01 17:07.05[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20240801170128: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000640822172164917, 'time_algorithm_update': 0.01027775764465332, 'loss': 0.6243636483550071, 'td_loss': 0.09126118901185691, 'conservative_loss': 0.533102459192276, 'time_step': 0.011054337978363037}[0m [36mstep[0m=[35m30000[0m


True

In [9]:
dataset = load_dataset(get_6x6_dataset_path(50))
model = get_BCQ_model()
train_and_save(dataset, model, 'BCQ_Gridworld6x6_50Episode.d3')

dataset = load_dataset(get_6x6_dataset_path(100))
model = get_BCQ_model()
train_and_save(dataset, model, 'BCQ_Gridworld6x6_100Episode.d3')

dataset = load_dataset(get_6x6_dataset_path(200))
model = get_BCQ_model()
train_and_save(dataset, model, 'BCQ_Gridworld6x6_200Episode.d3')

dataset = load_dataset(get_6x6_dataset_path(400))
model = get_BCQ_model()
train_and_save(dataset, model, 'BCQ_Gridworld6x6_400Episode.d3')

50
[2m2024-08-01 17:07.06[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 17:07.06[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteBCQ_20240801170706[0m
[2m2024-08-01 17:07.06[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-08-01 17:07.06[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-08-01 17:07.06[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 7, 7], 'action_size': 3, 'config': {'type': 'discrete_bcq', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'params

Epoch 1/30: 100%|██████████| 1000/1000 [00:13<00:00, 76.02it/s, loss=1.59, td_loss=0.00935, imitator_loss=1.58]

[2m2024-08-01 17:07.19[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006109046936035157, 'time_algorithm_update': 0.012316709518432617, 'loss': 1.5886025751829147, 'td_loss': 0.009264025834145286, 'imitator_loss': 1.5793385508060456, 'time_step': 0.013059298992156982}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:13<00:00, 73.73it/s, loss=1.54, td_loss=0.000105, imitator_loss=1.54]

[2m2024-08-01 17:07.32[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006228432655334472, 'time_algorithm_update': 0.012706108808517457, 'loss': 1.535148327946663, 'td_loss': 0.00010425755071082677, 'imitator_loss': 1.5350440708398818, 'time_step': 0.013462979555130004}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:13<00:00, 76.11it/s, loss=1.53, td_loss=6.89e-5, imitator_loss=1.53]

[2m2024-08-01 17:07.46[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006404800415039062, 'time_algorithm_update': 0.012255290746688843, 'loss': 1.526074580192566, 'td_loss': 6.88103527390922e-05, 'imitator_loss': 1.526005769968033, 'time_step': 0.01303500747680664}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:13<00:00, 74.94it/s, loss=1.52, td_loss=6.25e-5, imitator_loss=1.52]

[2m2024-08-01 17:07.59[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006281046867370605, 'time_algorithm_update': 0.0124835045337677, 'loss': 1.5225781872272492, 'td_loss': 6.242089436727837e-05, 'imitator_loss': 1.5225157669782639, 'time_step': 0.013248987197875976}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:12<00:00, 78.61it/s, loss=1.52, td_loss=5.94e-5, imitator_loss=1.52]

[2m2024-08-01 17:08.12[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006276612281799316, 'time_algorithm_update': 0.011869191646575928, 'loss': 1.5221344357728959, 'td_loss': 5.929947170352534e-05, 'imitator_loss': 1.522075135231018, 'time_step': 0.01263093638420105}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:10<00:00, 92.64it/s, loss=1.52, td_loss=5.49e-5, imitator_loss=1.52]

[2m2024-08-01 17:08.22[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006502707004547119, 'time_algorithm_update': 0.009914581775665282, 'loss': 1.5213078812360763, 'td_loss': 5.503508678702929e-05, 'imitator_loss': 1.5212528462409973, 'time_step': 0.01070023775100708}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:12<00:00, 78.34it/s, loss=1.52, td_loss=5.48e-5, imitator_loss=1.52]

[2m2024-08-01 17:08.35[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006399204730987549, 'time_algorithm_update': 0.011892714262008667, 'loss': 1.5209320640563966, 'td_loss': 5.4638115879242834e-05, 'imitator_loss': 1.520877424955368, 'time_step': 0.012667674303054809}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:12<00:00, 78.62it/s, loss=1.52, td_loss=5.37e-5, imitator_loss=1.52]

[2m2024-08-01 17:08.48[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006090559959411621, 'time_algorithm_update': 0.01188875961303711, 'loss': 1.5213365795612335, 'td_loss': 5.383045889084315e-05, 'imitator_loss': 1.521282748579979, 'time_step': 0.012626482725143433}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:13<00:00, 76.18it/s, loss=1.52, td_loss=0.00523, imitator_loss=1.52]

[2m2024-08-01 17:09.01[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006232531070709228, 'time_algorithm_update': 0.012278524160385131, 'loss': 1.5248191415071488, 'td_loss': 0.005190373541874578, 'imitator_loss': 1.5196287684440613, 'time_step': 0.013033319473266601}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:13<00:00, 74.06it/s, loss=1.52, td_loss=0.000209, imitator_loss=1.52]

[2m2024-08-01 17:09.15[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006445331573486328, 'time_algorithm_update': 0.012622098922729492, 'loss': 1.520163741827011, 'td_loss': 0.0002089131831053237, 'imitator_loss': 1.5199548287391662, 'time_step': 0.013397918701171876}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:12<00:00, 79.52it/s, loss=1.52, td_loss=0.000125, imitator_loss=1.52]


[2m2024-08-01 17:09.27[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006033506393432617, 'time_algorithm_update': 0.011753999233245849, 'loss': 1.5198186498880386, 'td_loss': 0.00012446262048797506, 'imitator_loss': 1.5196941884756088, 'time_step': 0.012487056255340577}[0m [36mstep[0m=[35m11000[0m


Epoch 12/30: 100%|██████████| 1000/1000 [00:13<00:00, 76.38it/s, loss=1.52, td_loss=9.99e-5, imitator_loss=1.52]

[2m2024-08-01 17:09.40[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006323397159576416, 'time_algorithm_update': 0.01222846269607544, 'loss': 1.5194545559883117, 'td_loss': 0.00010031620803783881, 'imitator_loss': 1.519354238152504, 'time_step': 0.012996098041534425}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:12<00:00, 79.83it/s, loss=1.52, td_loss=9.61e-5, imitator_loss=1.52]

[2m2024-08-01 17:09.53[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006003186702728271, 'time_algorithm_update': 0.011714478254318237, 'loss': 1.5204345660209655, 'td_loss': 9.59907237229345e-05, 'imitator_loss': 1.5203385745286941, 'time_step': 0.01243948745727539}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:13<00:00, 74.55it/s, loss=1.52, td_loss=9.03e-5, imitator_loss=1.52]

[2m2024-08-01 17:10.06[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006309938430786133, 'time_algorithm_update': 0.012554741859436036, 'loss': 1.5195069044828415, 'td_loss': 9.01860296507948e-05, 'imitator_loss': 1.519416717529297, 'time_step': 0.013315684080123902}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:12<00:00, 77.56it/s, loss=1.52, td_loss=9.59e-5, imitator_loss=1.52]

[2m2024-08-01 17:10.19[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006327776908874511, 'time_algorithm_update': 0.012028982639312744, 'loss': 1.5193891121149063, 'td_loss': 9.583460968133294e-05, 'imitator_loss': 1.5192932765483855, 'time_step': 0.012798588037490845}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:12<00:00, 77.08it/s, loss=1.52, td_loss=8.32e-5, imitator_loss=1.52]

[2m2024-08-01 17:10.32[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006119601726531982, 'time_algorithm_update': 0.01214632511138916, 'loss': 1.519143289089203, 'td_loss': 8.308181739630526e-05, 'imitator_loss': 1.5190602070093155, 'time_step': 0.012887657403945922}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:13<00:00, 76.11it/s, loss=1.53, td_loss=0.011, imitator_loss=1.52]


[2m2024-08-01 17:10.45[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006199851036071777, 'time_algorithm_update': 0.012287028312683105, 'loss': 1.5316945533752442, 'td_loss': 0.01101222594257706, 'imitator_loss': 1.5206823275089263, 'time_step': 0.013040657997131347}[0m [36mstep[0m=[35m17000[0m


Epoch 18/30: 100%|██████████| 1000/1000 [00:13<00:00, 75.28it/s, loss=1.53, td_loss=0.00732, imitator_loss=1.52]

[2m2024-08-01 17:10.59[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006202924251556397, 'time_algorithm_update': 0.012431955575942993, 'loss': 1.5272230132818223, 'td_loss': 0.0072847950921423035, 'imitator_loss': 1.5199382197856903, 'time_step': 0.013184571504592896}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:12<00:00, 81.19it/s, loss=1.53, td_loss=0.00857, imitator_loss=1.52]

[2m2024-08-01 17:11.11[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007089321613311767, 'time_algorithm_update': 0.011330652475357056, 'loss': 1.5283918075561524, 'td_loss': 0.00873029113341181, 'imitator_loss': 1.5196615142822265, 'time_step': 0.012196791172027589}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:12<00:00, 80.49it/s, loss=1.53, td_loss=0.0106, imitator_loss=1.52]


[2m2024-08-01 17:11.23[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008421809673309326, 'time_algorithm_update': 0.011265880346298218, 'loss': 1.530283191561699, 'td_loss': 0.01050803775069653, 'imitator_loss': 1.5197751544713973, 'time_step': 0.012282315492630005}[0m [36mstep[0m=[35m20000[0m


Epoch 21/30: 100%|██████████| 1000/1000 [00:12<00:00, 82.03it/s, loss=1.53, td_loss=0.00758, imitator_loss=1.52]

[2m2024-08-01 17:11.36[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007846131324768066, 'time_algorithm_update': 0.011084076166152954, 'loss': 1.5276915369033814, 'td_loss': 0.007554011235322833, 'imitator_loss': 1.5201375247240068, 'time_step': 0.01204187250137329}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:12<00:00, 79.26it/s, loss=1.53, td_loss=0.0116, imitator_loss=1.52] 

[2m2024-08-01 17:11.48[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007208065986633301, 'time_algorithm_update': 0.011593333959579467, 'loss': 1.531142753124237, 'td_loss': 0.011589488304896804, 'imitator_loss': 1.5195532633066178, 'time_step': 0.012482917070388793}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:11<00:00, 86.92it/s, loss=1.53, td_loss=0.00959, imitator_loss=1.52] 

[2m2024-08-01 17:12.00[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007359843254089355, 'time_algorithm_update': 0.010465934038162232, 'loss': 1.5298426438570023, 'td_loss': 0.009614151802452398, 'imitator_loss': 1.5202284940481186, 'time_step': 0.011369465112686158}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.16it/s, loss=1.53, td_loss=0.0116, imitator_loss=1.52] 

[2m2024-08-01 17:12.10[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007082843780517579, 'time_algorithm_update': 0.009411270141601563, 'loss': 1.5305831456184387, 'td_loss': 0.011564707228419138, 'imitator_loss': 1.5190184389352799, 'time_step': 0.010282143592834473}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.04it/s, loss=1.54, td_loss=0.0161, imitator_loss=1.52]


[2m2024-08-01 17:12.22[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007400689125061035, 'time_algorithm_update': 0.01043737530708313, 'loss': 1.537474374294281, 'td_loss': 0.016211249031242915, 'imitator_loss': 1.521263125181198, 'time_step': 0.011349516868591309}[0m [36mstep[0m=[35m25000[0m


Epoch 26/30: 100%|██████████| 1000/1000 [00:11<00:00, 90.82it/s, loss=1.53, td_loss=0.0136, imitator_loss=1.52]

[2m2024-08-01 17:12.33[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007676587104797363, 'time_algorithm_update': 0.009951107978820801, 'loss': 1.5335483107566834, 'td_loss': 0.013556337655929384, 'imitator_loss': 1.5199919737577439, 'time_step': 0.010884760141372681}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:15<00:00, 64.74it/s, loss=1.54, td_loss=0.0155, imitator_loss=1.52]

[2m2024-08-01 17:12.49[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008546040058135986, 'time_algorithm_update': 0.014246254444122315, 'loss': 1.5353352663517, 'td_loss': 0.015448723859910387, 'imitator_loss': 1.519886543393135, 'time_step': 0.01528393530845642}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:13<00:00, 76.08it/s, loss=1.53, td_loss=0.012, imitator_loss=1.52] 


[2m2024-08-01 17:13.02[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000816392183303833, 'time_algorithm_update': 0.01201344060897827, 'loss': 1.5323958628177643, 'td_loss': 0.011928526034753304, 'imitator_loss': 1.5204673368930817, 'time_step': 0.013001080513000488}[0m [36mstep[0m=[35m28000[0m


Epoch 29/30: 100%|██████████| 1000/1000 [00:10<00:00, 92.29it/s, loss=1.53, td_loss=0.00825, imitator_loss=1.52]

[2m2024-08-01 17:13.13[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007054789066314697, 'time_algorithm_update': 0.009818776845932006, 'loss': 1.528001947760582, 'td_loss': 0.008240078899136279, 'imitator_loss': 1.5197618693113326, 'time_step': 0.01070027756690979}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.23it/s, loss=1.53, td_loss=0.0128, imitator_loss=1.52] 


[2m2024-08-01 17:13.24[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801170706: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007592580318450928, 'time_algorithm_update': 0.01040456509590149, 'loss': 1.5320711979866029, 'td_loss': 0.012866736384872637, 'imitator_loss': 1.5192044640779496, 'time_step': 0.01133060908317566}[0m [36mstep[0m=[35m30000[0m
100
[2m2024-08-01 17:13.25[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 17:13.25[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteBCQ_20240801171325[0m
[2m2024-08-01 17:13.25[0m [[32m[1mdebug    [0m] [1mB

Epoch 1/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.77it/s, loss=1.6, td_loss=0.0106, imitator_loss=1.59] 


[2m2024-08-01 17:13.37[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007498362064361573, 'time_algorithm_update': 0.010352374792098998, 'loss': 1.600712368965149, 'td_loss': 0.010501703268884739, 'imitator_loss': 1.590210665345192, 'time_step': 0.011269023418426513}[0m [36mstep[0m=[35m1000[0m


Epoch 2/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.44it/s, loss=1.54, td_loss=0.000169, imitator_loss=1.54]

[2m2024-08-01 17:13.48[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006779510974884033, 'time_algorithm_update': 0.009765063047409057, 'loss': 1.5436536977291107, 'td_loss': 0.0001684324908292183, 'imitator_loss': 1.5434852641820909, 'time_step': 0.01059448003768921}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:11<00:00, 85.55it/s, loss=1.53, td_loss=8.91e-5, imitator_loss=1.53]

[2m2024-08-01 17:13.59[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007343344688415527, 'time_algorithm_update': 0.010695823669433594, 'loss': 1.5331668812036514, 'td_loss': 8.904706326393352e-05, 'imitator_loss': 1.5330778334140778, 'time_step': 0.01157896900177002}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.33it/s, loss=1.53, td_loss=8.41e-5, imitator_loss=1.53]


[2m2024-08-01 17:14.11[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007771065235137939, 'time_algorithm_update': 0.010371718406677246, 'loss': 1.5285498752593993, 'td_loss': 8.402159553497768e-05, 'imitator_loss': 1.5284658535718918, 'time_step': 0.011315882205963135}[0m [36mstep[0m=[35m4000[0m


Epoch 5/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.70it/s, loss=1.53, td_loss=7.46e-5, imitator_loss=1.53]


[2m2024-08-01 17:14.22[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007494487762451172, 'time_algorithm_update': 0.010235900163650512, 'loss': 1.5257215517759324, 'td_loss': 7.474019485925965e-05, 'imitator_loss': 1.525646812915802, 'time_step': 0.011146781921386718}[0m [36mstep[0m=[35m5000[0m


Epoch 6/30: 100%|██████████| 1000/1000 [00:13<00:00, 76.30it/s, loss=1.52, td_loss=6.87e-5, imitator_loss=1.52]


[2m2024-08-01 17:14.36[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008464970588684082, 'time_algorithm_update': 0.011898935317993164, 'loss': 1.5235935052633285, 'td_loss': 6.865375238430715e-05, 'imitator_loss': 1.5235248523950577, 'time_step': 0.012934746980667114}[0m [36mstep[0m=[35m6000[0m


Epoch 7/30: 100%|██████████| 1000/1000 [00:10<00:00, 95.03it/s, loss=1.52, td_loss=5.69e-5, imitator_loss=1.52]

[2m2024-08-01 17:14.46[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006121373176574707, 'time_algorithm_update': 0.009660982370376587, 'loss': 1.5216260228157044, 'td_loss': 5.678969547443558e-05, 'imitator_loss': 1.521569233894348, 'time_step': 0.010424567937850952}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:12<00:00, 78.24it/s, loss=1.52, td_loss=5.14e-5, imitator_loss=1.52]


[2m2024-08-01 17:14.59[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0008168728351593018, 'time_algorithm_update': 0.011666812419891358, 'loss': 1.5219447321891784, 'td_loss': 5.1489740813849495e-05, 'imitator_loss': 1.5218932430744172, 'time_step': 0.012649059772491455}[0m [36mstep[0m=[35m8000[0m


Epoch 9/30: 100%|██████████| 1000/1000 [00:08<00:00, 111.38it/s, loss=1.53, td_loss=0.00512, imitator_loss=1.52]


[2m2024-08-01 17:15.08[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000605013370513916, 'time_algorithm_update': 0.008112207889556885, 'loss': 1.525615402698517, 'td_loss': 0.005076284622613457, 'imitator_loss': 1.5205391180515289, 'time_step': 0.008872632265090942}[0m [36mstep[0m=[35m9000[0m


Epoch 10/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.41it/s, loss=1.52, td_loss=0.000185, imitator_loss=1.52]


[2m2024-08-01 17:15.18[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006788642406463623, 'time_algorithm_update': 0.008902727365493775, 'loss': 1.5199734435081482, 'td_loss': 0.0001842679554138158, 'imitator_loss': 1.5197891752719879, 'time_step': 0.00974434757232666}[0m [36mstep[0m=[35m10000[0m


Epoch 11/30: 100%|██████████| 1000/1000 [00:10<00:00, 94.52it/s, loss=1.52, td_loss=0.000116, imitator_loss=1.52]


[2m2024-08-01 17:15.29[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000693727970123291, 'time_algorithm_update': 0.009598152160644531, 'loss': 1.5196362136602402, 'td_loss': 0.00011642729181039613, 'imitator_loss': 1.519519785284996, 'time_step': 0.010454707860946655}[0m [36mstep[0m=[35m11000[0m


Epoch 12/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.06it/s, loss=1.52, td_loss=0.000106, imitator_loss=1.52]

[2m2024-08-01 17:15.40[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007320616245269775, 'time_algorithm_update': 0.010466412782669068, 'loss': 1.518374607563019, 'td_loss': 0.00010591428050065588, 'imitator_loss': 1.5182686946392059, 'time_step': 0.011359986305236816}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:10<00:00, 91.94it/s, loss=1.52, td_loss=9.97e-5, imitator_loss=1.52] 

[2m2024-08-01 17:15.51[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006515097618103028, 'time_algorithm_update': 0.010005266666412353, 'loss': 1.5202984063625335, 'td_loss': 9.943082243808022e-05, 'imitator_loss': 1.5201989749670028, 'time_step': 0.010789695024490356}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.95it/s, loss=1.52, td_loss=9.6e-5, imitator_loss=1.52] 

[2m2024-08-01 17:16.01[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006411499977111816, 'time_algorithm_update': 0.009463469982147216, 'loss': 1.518070632457733, 'td_loss': 9.579189826581568e-05, 'imitator_loss': 1.5179748383760452, 'time_step': 0.010232787609100341}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:09<00:00, 104.74it/s, loss=1.52, td_loss=9.9e-5, imitator_loss=1.52] 

[2m2024-08-01 17:16.11[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006122841835021973, 'time_algorithm_update': 0.008701038837432862, 'loss': 1.5181281484365463, 'td_loss': 9.902836744731758e-05, 'imitator_loss': 1.518029121041298, 'time_step': 0.00945347023010254}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.58it/s, loss=1.52, td_loss=9.41e-5, imitator_loss=1.52] 

[2m2024-08-01 17:16.22[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007384729385375977, 'time_algorithm_update': 0.01024375605583191, 'loss': 1.5181856117248536, 'td_loss': 9.405232282733778e-05, 'imitator_loss': 1.518091559290886, 'time_step': 0.01115455174446106}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.54it/s, loss=1.53, td_loss=0.0104, imitator_loss=1.52]

[2m2024-08-01 17:16.32[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006611287593841553, 'time_algorithm_update': 0.008827277421951295, 'loss': 1.5285537233352662, 'td_loss': 0.010546310100136907, 'imitator_loss': 1.51800741314888, 'time_step': 0.009644366264343262}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:10<00:00, 100.00it/s, loss=1.53, td_loss=0.0101, imitator_loss=1.52]


[2m2024-08-01 17:16.42[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006620333194732665, 'time_algorithm_update': 0.009069278001785279, 'loss': 1.527755955696106, 'td_loss': 0.010050943853872013, 'imitator_loss': 1.5177050129175187, 'time_step': 0.0098893723487854}[0m [36mstep[0m=[35m18000[0m


Epoch 19/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.40it/s, loss=1.52, td_loss=0.00732, imitator_loss=1.52]

[2m2024-08-01 17:16.52[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006668164730072022, 'time_algorithm_update': 0.009317627906799316, 'loss': 1.5237712301015853, 'td_loss': 0.007265044500592921, 'imitator_loss': 1.5165061864852905, 'time_step': 0.010148755311965942}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:11<00:00, 86.26it/s, loss=1.52, td_loss=0.00419, imitator_loss=1.52]

[2m2024-08-01 17:17.04[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007451426982879639, 'time_algorithm_update': 0.010546978235244751, 'loss': 1.522196580171585, 'td_loss': 0.004164614260935196, 'imitator_loss': 1.518031967163086, 'time_step': 0.01145915412902832}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.09it/s, loss=1.52, td_loss=0.00342, imitator_loss=1.52]

[2m2024-08-01 17:17.14[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007043502330780029, 'time_algorithm_update': 0.008938590049743652, 'loss': 1.521343690276146, 'td_loss': 0.003406433739750355, 'imitator_loss': 1.51793725669384, 'time_step': 0.009789429664611816}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.50it/s, loss=1.52, td_loss=0.00306, imitator_loss=1.52]

[2m2024-08-01 17:17.23[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006830649375915527, 'time_algorithm_update': 0.008819451332092285, 'loss': 1.519437635421753, 'td_loss': 0.003045126422370231, 'imitator_loss': 1.5163925087451935, 'time_step': 0.009652873039245606}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.50it/s, loss=1.53, td_loss=0.00644, imitator_loss=1.52]

[2m2024-08-01 17:17.33[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006935334205627441, 'time_algorithm_update': 0.008911036729812622, 'loss': 1.5252680854797362, 'td_loss': 0.006415641585223056, 'imitator_loss': 1.518852443933487, 'time_step': 0.009751085042953491}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.59it/s, loss=1.52, td_loss=0.00249, imitator_loss=1.52]

[2m2024-08-01 17:17.44[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007035467624664307, 'time_algorithm_update': 0.009185542106628418, 'loss': 1.5206985602378844, 'td_loss': 0.0025610567525982332, 'imitator_loss': 1.518137503027916, 'time_step': 0.010037853240966796}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:11<00:00, 85.85it/s, loss=1.54, td_loss=0.0145, imitator_loss=1.52]

[2m2024-08-01 17:17.55[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007425892353057861, 'time_algorithm_update': 0.010592122554779053, 'loss': 1.5350480440855025, 'td_loss': 0.014449699653778226, 'imitator_loss': 1.5205983455181122, 'time_step': 0.011506356716156007}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:10<00:00, 92.51it/s, loss=1.53, td_loss=0.0145, imitator_loss=1.52] 

[2m2024-08-01 17:18.06[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006957893371582031, 'time_algorithm_update': 0.00982526731491089, 'loss': 1.532204368829727, 'td_loss': 0.014411600865249057, 'imitator_loss': 1.5177927677631378, 'time_step': 0.01068530774116516}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:11<00:00, 83.80it/s, loss=1.54, td_loss=0.0182, imitator_loss=1.52]

[2m2024-08-01 17:18.18[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007550785541534424, 'time_algorithm_update': 0.010862309455871582, 'loss': 1.5351985039710998, 'td_loss': 0.01813885426451452, 'imitator_loss': 1.5170596514940262, 'time_step': 0.011790106296539307}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:11<00:00, 90.43it/s, loss=1.54, td_loss=0.0191, imitator_loss=1.52]

[2m2024-08-01 17:18.29[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007143120765686035, 'time_algorithm_update': 0.010042672634124755, 'loss': 1.5369420208930968, 'td_loss': 0.019056932472041807, 'imitator_loss': 1.5178850882053376, 'time_step': 0.010924303770065308}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.62it/s, loss=1.54, td_loss=0.0228, imitator_loss=1.52]


[2m2024-08-01 17:18.40[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007253367900848388, 'time_algorithm_update': 0.010260838031768799, 'loss': 1.5413121274709702, 'td_loss': 0.02287735157366842, 'imitator_loss': 1.5184347749948501, 'time_step': 0.011152347803115845}[0m [36mstep[0m=[35m29000[0m


Epoch 30/30: 100%|██████████| 1000/1000 [00:12<00:00, 83.01it/s, loss=1.54, td_loss=0.0225, imitator_loss=1.52]


[2m2024-08-01 17:18.52[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171325: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007588326930999756, 'time_algorithm_update': 0.01097082233428955, 'loss': 1.5422054283618927, 'td_loss': 0.02261128898570314, 'imitator_loss': 1.5195941392183303, 'time_step': 0.01190296483039856}[0m [36mstep[0m=[35m30000[0m
200
[2m2024-08-01 17:18.53[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 17:18.53[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteBCQ_20240801171853[0m
[2m2024-08-01 17:18.53[0m [[32m[1mdebug    [0m] [1mBu

Epoch 1/30: 100%|██████████| 1000/1000 [00:10<00:00, 95.25it/s, loss=1.58, td_loss=0.00973, imitator_loss=1.57]

[2m2024-08-01 17:19.03[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006880397796630859, 'time_algorithm_update': 0.009538432121276856, 'loss': 1.581821411371231, 'td_loss': 0.009650653146658443, 'imitator_loss': 1.572170758843422, 'time_step': 0.010383424758911132}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:10<00:00, 92.00it/s, loss=1.53, td_loss=0.000275, imitator_loss=1.53]

[2m2024-08-01 17:19.14[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007125236988067627, 'time_algorithm_update': 0.00987327218055725, 'loss': 1.5296312190294266, 'td_loss': 0.00027496952050569233, 'imitator_loss': 1.5293562504053115, 'time_step': 0.010746612787246704}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:11<00:00, 86.52it/s, loss=1.53, td_loss=0.000179, imitator_loss=1.53]

[2m2024-08-01 17:19.26[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007535324096679687, 'time_algorithm_update': 0.010493396282196045, 'loss': 1.5259132860898972, 'td_loss': 0.00017844949070786243, 'imitator_loss': 1.5257348372936248, 'time_step': 0.011418023824691773}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:11<00:00, 86.68it/s, loss=1.52, td_loss=0.000136, imitator_loss=1.52]

[2m2024-08-01 17:19.37[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007438199520111083, 'time_algorithm_update': 0.01048196244239807, 'loss': 1.5238760406970977, 'td_loss': 0.00013636799046071247, 'imitator_loss': 1.523739672780037, 'time_step': 0.011397003173828125}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:12<00:00, 81.87it/s, loss=1.52, td_loss=0.00011, imitator_loss=1.52] 

[2m2024-08-01 17:19.50[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007837784290313721, 'time_algorithm_update': 0.011100634098052979, 'loss': 1.5208748897314073, 'td_loss': 0.00011002544002985815, 'imitator_loss': 1.520764865040779, 'time_step': 0.012064292907714843}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:12<00:00, 82.82it/s, loss=1.52, td_loss=0.000103, imitator_loss=1.52]

[2m2024-08-01 17:20.02[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007662117481231689, 'time_algorithm_update': 0.010994030475616454, 'loss': 1.5216159071922302, 'td_loss': 0.00010323380806403293, 'imitator_loss': 1.5215126737356186, 'time_step': 0.011932656764984131}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:11<00:00, 84.62it/s, loss=1.52, td_loss=9.94e-5, imitator_loss=1.52]

[2m2024-08-01 17:20.14[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007509267330169677, 'time_algorithm_update': 0.01075672483444214, 'loss': 1.5207163103818893, 'td_loss': 9.910413532270468e-05, 'imitator_loss': 1.5206172077655793, 'time_step': 0.011677678346633911}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:11<00:00, 84.17it/s, loss=1.52, td_loss=8.95e-5, imitator_loss=1.52]

[2m2024-08-01 17:20.25[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007647466659545898, 'time_algorithm_update': 0.010792073011398316, 'loss': 1.5213508149385453, 'td_loss': 8.958727368190011e-05, 'imitator_loss': 1.5212612266540528, 'time_step': 0.011734369039535522}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:10<00:00, 92.06it/s, loss=1.53, td_loss=0.00625, imitator_loss=1.52]


[2m2024-08-01 17:20.36[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007056329250335694, 'time_algorithm_update': 0.009860259771347047, 'loss': 1.5280740765333176, 'td_loss': 0.006196199069810973, 'imitator_loss': 1.5218778792619705, 'time_step': 0.010733523607254028}[0m [36mstep[0m=[35m9000[0m


Epoch 10/30: 100%|██████████| 1000/1000 [00:11<00:00, 89.34it/s, loss=1.52, td_loss=0.00029, imitator_loss=1.52] 

[2m2024-08-01 17:20.47[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000725844144821167, 'time_algorithm_update': 0.010160749435424804, 'loss': 1.5211098309755324, 'td_loss': 0.0002893186003420851, 'imitator_loss': 1.5208205115795135, 'time_step': 0.011059768199920654}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:11<00:00, 83.82it/s, loss=1.52, td_loss=0.000155, imitator_loss=1.52]

[2m2024-08-01 17:20.59[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007624249458312989, 'time_algorithm_update': 0.010846126556396484, 'loss': 1.5209031711816787, 'td_loss': 0.0001542269018555089, 'imitator_loss': 1.5207489442825317, 'time_step': 0.011780815601348877}[0m [36mstep[0m=[35m11000[0m



Epoch 12/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.54it/s, loss=1.52, td_loss=0.000115, imitator_loss=1.52]

[2m2024-08-01 17:21.11[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007285387516021729, 'time_algorithm_update': 0.01026133632659912, 'loss': 1.5208669757843019, 'td_loss': 0.00011454493279779854, 'imitator_loss': 1.520752431511879, 'time_step': 0.011156904935836792}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:12<00:00, 81.28it/s, loss=1.52, td_loss=9.18e-5, imitator_loss=1.52]

[2m2024-08-01 17:21.23[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007907109260559082, 'time_algorithm_update': 0.011192939519882202, 'loss': 1.5201497166156768, 'td_loss': 9.183442187895708e-05, 'imitator_loss': 1.5200578838586807, 'time_step': 0.012154770135879516}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:11<00:00, 83.97it/s, loss=1.52, td_loss=7.85e-5, imitator_loss=1.52]

[2m2024-08-01 17:21.35[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007652807235717774, 'time_algorithm_update': 0.010823716402053834, 'loss': 1.524794725060463, 'td_loss': 7.838566921054734e-05, 'imitator_loss': 1.5247163395881653, 'time_step': 0.011762709379196167}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:12<00:00, 82.71it/s, loss=1.52, td_loss=6.76e-5, imitator_loss=1.52]


[2m2024-08-01 17:21.47[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000772348403930664, 'time_algorithm_update': 0.011004358291625977, 'loss': 1.519013570189476, 'td_loss': 6.748398129184352e-05, 'imitator_loss': 1.518946086168289, 'time_step': 0.011946398258209229}[0m [36mstep[0m=[35m15000[0m


Epoch 16/30: 100%|██████████| 1000/1000 [00:09<00:00, 104.38it/s, loss=1.52, td_loss=6.96e-5, imitator_loss=1.52]

[2m2024-08-01 17:21.57[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006426057815551758, 'time_algorithm_update': 0.008670830488204957, 'loss': 1.5213769664764405, 'td_loss': 6.951900616877538e-05, 'imitator_loss': 1.5213074468374252, 'time_step': 0.009470850944519042}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:10<00:00, 94.68it/s, loss=1.54, td_loss=0.0152, imitator_loss=1.52]

[2m2024-08-01 17:22.07[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006873781681060791, 'time_algorithm_update': 0.009588017225265503, 'loss': 1.5356293486356736, 'td_loss': 0.0153361520739345, 'imitator_loss': 1.5202931962013244, 'time_step': 0.010438385486602784}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:11<00:00, 85.38it/s, loss=1.54, td_loss=0.0168, imitator_loss=1.52]

[2m2024-08-01 17:22.19[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007438766956329346, 'time_algorithm_update': 0.010664394855499268, 'loss': 1.5395791670084, 'td_loss': 0.016966652135481125, 'imitator_loss': 1.5226125143766402, 'time_step': 0.01157449197769165}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:11<00:00, 86.01it/s, loss=1.55, td_loss=0.0243, imitator_loss=1.52]

[2m2024-08-01 17:22.31[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007500517368316651, 'time_algorithm_update': 0.010564292669296265, 'loss': 1.5480785024166108, 'td_loss': 0.02439263438800117, 'imitator_loss': 1.5236858687400818, 'time_step': 0.01148522162437439}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.92it/s, loss=1.55, td_loss=0.0243, imitator_loss=1.52]

[2m2024-08-01 17:22.41[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006774780750274658, 'time_algorithm_update': 0.009164044141769408, 'loss': 1.5465687568187714, 'td_loss': 0.02412061457929667, 'imitator_loss': 1.5224481397867202, 'time_step': 0.009997349739074707}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.06it/s, loss=1.54, td_loss=0.02, imitator_loss=1.52]  


[2m2024-08-01 17:22.51[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006704392433166504, 'time_algorithm_update': 0.009148982048034668, 'loss': 1.5423232569694518, 'td_loss': 0.019850269101676532, 'imitator_loss': 1.5224729868173599, 'time_step': 0.00998175048828125}[0m [36mstep[0m=[35m21000[0m


Epoch 22/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.34it/s, loss=1.54, td_loss=0.019, imitator_loss=1.52] 

[2m2024-08-01 17:23.02[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000700188159942627, 'time_algorithm_update': 0.009730619668960572, 'loss': 1.5403290226459503, 'td_loss': 0.01912332924688235, 'imitator_loss': 1.5212056938409806, 'time_step': 0.010592578411102295}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.19it/s, loss=1.54, td_loss=0.0219, imitator_loss=1.52]

[2m2024-08-01 17:23.12[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006724228858947754, 'time_algorithm_update': 0.00922806477546692, 'loss': 1.5409573222398758, 'td_loss': 0.0218866141110193, 'imitator_loss': 1.519070708990097, 'time_step': 0.010063422918319702}[0m [36mstep[0m=[35m23000[0m



Epoch 24/30: 100%|██████████| 1000/1000 [00:10<00:00, 93.58it/s, loss=1.54, td_loss=0.0177, imitator_loss=1.52]

[2m2024-08-01 17:23.22[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006998541355133056, 'time_algorithm_update': 0.009690378189086914, 'loss': 1.539437554359436, 'td_loss': 0.01782327229541261, 'imitator_loss': 1.5216142823696137, 'time_step': 0.01055682396888733}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:10<00:00, 99.78it/s, loss=1.54, td_loss=0.016, imitator_loss=1.52] 

[2m2024-08-01 17:23.33[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006617755889892578, 'time_algorithm_update': 0.009084688901901245, 'loss': 1.5357601039409638, 'td_loss': 0.015944082117639483, 'imitator_loss': 1.5198160213232041, 'time_step': 0.009905765771865844}[0m [36mstep[0m=[35m25000[0m



Epoch 26/30: 100%|██████████| 1000/1000 [00:08<00:00, 112.41it/s, loss=1.53, td_loss=0.00982, imitator_loss=1.52]


[2m2024-08-01 17:23.41[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006039924621582031, 'time_algorithm_update': 0.00804005265235901, 'loss': 1.5282588478326797, 'td_loss': 0.009824599271407351, 'imitator_loss': 1.5184342486858369, 'time_step': 0.008797332286834717}[0m [36mstep[0m=[35m26000[0m


Epoch 27/30: 100%|██████████| 1000/1000 [00:11<00:00, 85.01it/s, loss=1.53, td_loss=0.00996, imitator_loss=1.52]

[2m2024-08-01 17:23.53[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007531566619873047, 'time_algorithm_update': 0.010692505598068237, 'loss': 1.531001994729042, 'td_loss': 0.009924454159568994, 'imitator_loss': 1.5210775393247604, 'time_step': 0.01161906909942627}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:12<00:00, 83.19it/s, loss=1.53, td_loss=0.00999, imitator_loss=1.52]

[2m2024-08-01 17:24.05[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007666785717010498, 'time_algorithm_update': 0.010920337915420531, 'loss': 1.52962699174881, 'td_loss': 0.010011309251975036, 'imitator_loss': 1.5196156817674638, 'time_step': 0.011869071245193482}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:11<00:00, 89.08it/s, loss=1.53, td_loss=0.00989, imitator_loss=1.52]

[2m2024-08-01 17:24.16[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.000730790376663208, 'time_algorithm_update': 0.010186588048934936, 'loss': 1.5295632421970367, 'td_loss': 0.009902868724209838, 'imitator_loss': 1.5196603738069534, 'time_step': 0.011087494611740112}[0m [36mstep[0m=[35m29000[0m



Epoch 30/30: 100%|██████████| 1000/1000 [00:11<00:00, 84.67it/s, loss=1.53, td_loss=0.0095, imitator_loss=1.52] 


[2m2024-08-01 17:24.28[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801171853: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007686233520507813, 'time_algorithm_update': 0.01073008632659912, 'loss': 1.529637935757637, 'td_loss': 0.009491081041713187, 'imitator_loss': 1.5201468546390533, 'time_step': 0.0116689555644989}[0m [36mstep[0m=[35m30000[0m
400
[2m2024-08-01 17:24.29[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 7, 7)]), action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=3)[0m
[2m2024-08-01 17:24.29[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteBCQ_20240801172429[0m
[2m2024-08-01 17:24.29[0m [[32m[1mdebug    [0m] [1mBui

Epoch 1/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.99it/s, loss=1.61, td_loss=0.0165, imitator_loss=1.6] 

[2m2024-08-01 17:24.40[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=1 step=1000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007362256050109864, 'time_algorithm_update': 0.01019239044189453, 'loss': 1.6133547407388686, 'td_loss': 0.01635269936081022, 'imitator_loss': 1.5970020421743394, 'time_step': 0.011102132320404053}[0m [36mstep[0m=[35m1000[0m



Epoch 2/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.56it/s, loss=1.56, td_loss=0.000434, imitator_loss=1.56]

[2m2024-08-01 17:24.50[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=2 step=2000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006617038249969483, 'time_algorithm_update': 0.009023486614227295, 'loss': 1.557901739835739, 'td_loss': 0.0004339771242957795, 'imitator_loss': 1.5574677646160127, 'time_step': 0.00984020233154297}[0m [36mstep[0m=[35m2000[0m



Epoch 3/30: 100%|██████████| 1000/1000 [00:11<00:00, 86.97it/s, loss=1.55, td_loss=0.000365, imitator_loss=1.54]

[2m2024-08-01 17:25.02[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=3 step=3000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007456459999084472, 'time_algorithm_update': 0.01044600510597229, 'loss': 1.5448678520917893, 'td_loss': 0.00036687672769767233, 'imitator_loss': 1.5445009759664536, 'time_step': 0.011361970424652099}[0m [36mstep[0m=[35m3000[0m



Epoch 4/30: 100%|██████████| 1000/1000 [00:10<00:00, 95.09it/s, loss=1.53, td_loss=0.000305, imitator_loss=1.53]

[2m2024-08-01 17:25.12[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=4 step=4000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006946663856506347, 'time_algorithm_update': 0.009539110898971557, 'loss': 1.5339882597923278, 'td_loss': 0.000304878220835235, 'imitator_loss': 1.5336833822727203, 'time_step': 0.010396511316299439}[0m [36mstep[0m=[35m4000[0m



Epoch 5/30: 100%|██████████| 1000/1000 [00:07<00:00, 127.53it/s, loss=1.53, td_loss=0.000284, imitator_loss=1.53]

[2m2024-08-01 17:25.20[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=5 step=5000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0005517499446868896, 'time_algorithm_update': 0.007058564901351929, 'loss': 1.5297483850717544, 'td_loss': 0.0002851571885003068, 'imitator_loss': 1.529463227391243, 'time_step': 0.007753434896469116}[0m [36mstep[0m=[35m5000[0m



Epoch 6/30: 100%|██████████| 1000/1000 [00:12<00:00, 81.27it/s, loss=1.53, td_loss=0.000304, imitator_loss=1.53]

[2m2024-08-01 17:25.32[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=6 step=6000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007916190624237061, 'time_algorithm_update': 0.011175716876983642, 'loss': 1.5262661488056184, 'td_loss': 0.0003035705429210793, 'imitator_loss': 1.5259625784158706, 'time_step': 0.012148762702941894}[0m [36mstep[0m=[35m6000[0m



Epoch 7/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.19it/s, loss=1.53, td_loss=0.000241, imitator_loss=1.53]

[2m2024-08-01 17:25.42[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=7 step=7000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006565699577331543, 'time_algorithm_update': 0.008955524444580079, 'loss': 1.5255383758544923, 'td_loss': 0.00023980815152754075, 'imitator_loss': 1.525298568367958, 'time_step': 0.009768450498580933}[0m [36mstep[0m=[35m7000[0m



Epoch 8/30: 100%|██████████| 1000/1000 [00:10<00:00, 94.55it/s, loss=1.53, td_loss=0.000213, imitator_loss=1.53]

[2m2024-08-01 17:25.53[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=8 step=8000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006968319416046142, 'time_algorithm_update': 0.009590409278869628, 'loss': 1.5270041819810867, 'td_loss': 0.00021255770736388512, 'imitator_loss': 1.5267916256189347, 'time_step': 0.01045454454421997}[0m [36mstep[0m=[35m8000[0m



Epoch 9/30: 100%|██████████| 1000/1000 [00:09<00:00, 102.99it/s, loss=1.53, td_loss=0.00777, imitator_loss=1.53]

[2m2024-08-01 17:26.02[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=9 step=9000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006537721157073975, 'time_algorithm_update': 0.008803651571273803, 'loss': 1.5328575805425644, 'td_loss': 0.0077077739802771245, 'imitator_loss': 1.5251498053073882, 'time_step': 0.009608511924743652}[0m [36mstep[0m=[35m9000[0m



Epoch 10/30: 100%|██████████| 1000/1000 [00:11<00:00, 89.46it/s, loss=1.52, td_loss=0.00032, imitator_loss=1.52] 

[2m2024-08-01 17:26.14[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=10 step=10000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007278752326965332, 'time_algorithm_update': 0.010149906635284424, 'loss': 1.5244915879964829, 'td_loss': 0.00031823461365274855, 'imitator_loss': 1.5241733531951904, 'time_step': 0.01104697036743164}[0m [36mstep[0m=[35m10000[0m



Epoch 11/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.37it/s, loss=1.52, td_loss=0.000194, imitator_loss=1.52]


[2m2024-08-01 17:26.25[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=11 step=11000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007324728965759277, 'time_algorithm_update': 0.010407989740371704, 'loss': 1.5235212024450302, 'td_loss': 0.00019469642800868314, 'imitator_loss': 1.523326506972313, 'time_step': 0.011310688734054565}[0m [36mstep[0m=[35m11000[0m


Epoch 12/30: 100%|██████████| 1000/1000 [00:11<00:00, 85.37it/s, loss=1.52, td_loss=0.000151, imitator_loss=1.52]

[2m2024-08-01 17:26.37[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=12 step=12000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007514798641204834, 'time_algorithm_update': 0.010647378206253052, 'loss': 1.5239599976539613, 'td_loss': 0.0001502851494551578, 'imitator_loss': 1.5238097116947174, 'time_step': 0.011571789741516113}[0m [36mstep[0m=[35m12000[0m



Epoch 13/30: 100%|██████████| 1000/1000 [00:12<00:00, 80.75it/s, loss=1.53, td_loss=0.000136, imitator_loss=1.53]

[2m2024-08-01 17:26.49[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=13 step=13000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007861130237579346, 'time_algorithm_update': 0.011265142440795899, 'loss': 1.52508629655838, 'td_loss': 0.00013567565927587565, 'imitator_loss': 1.5249506205320358, 'time_step': 0.012228601932525635}[0m [36mstep[0m=[35m13000[0m



Epoch 14/30: 100%|██████████| 1000/1000 [00:10<00:00, 91.54it/s, loss=1.53, td_loss=0.000124, imitator_loss=1.53]

[2m2024-08-01 17:27.00[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=14 step=14000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007130892276763916, 'time_algorithm_update': 0.009910702943801879, 'loss': 1.52605856859684, 'td_loss': 0.00012344948710779136, 'imitator_loss': 1.525935119509697, 'time_step': 0.010791752099990845}[0m [36mstep[0m=[35m14000[0m



Epoch 15/30: 100%|██████████| 1000/1000 [00:11<00:00, 87.25it/s, loss=1.53, td_loss=0.000113, imitator_loss=1.53]

[2m2024-08-01 17:27.12[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=15 step=15000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007346413135528565, 'time_algorithm_update': 0.010428208827972412, 'loss': 1.5255832442045212, 'td_loss': 0.00011339533655882406, 'imitator_loss': 1.525469847202301, 'time_step': 0.011329205751419068}[0m [36mstep[0m=[35m15000[0m



Epoch 16/30: 100%|██████████| 1000/1000 [00:10<00:00, 98.91it/s, loss=1.52, td_loss=0.000118, imitator_loss=1.52]

[2m2024-08-01 17:27.22[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=16 step=16000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006777870655059814, 'time_algorithm_update': 0.009157747268676757, 'loss': 1.5244495396614075, 'td_loss': 0.00011818423860313487, 'imitator_loss': 1.5243313547372819, 'time_step': 0.00999554705619812}[0m [36mstep[0m=[35m16000[0m



Epoch 17/30: 100%|██████████| 1000/1000 [00:11<00:00, 85.48it/s, loss=1.55, td_loss=0.0194, imitator_loss=1.53]

[2m2024-08-01 17:27.33[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=17 step=17000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007506980895996094, 'time_algorithm_update': 0.010637080192565918, 'loss': 1.545534065246582, 'td_loss': 0.01943461459045284, 'imitator_loss': 1.526099449157715, 'time_step': 0.011556650161743164}[0m [36mstep[0m=[35m17000[0m



Epoch 18/30: 100%|██████████| 1000/1000 [00:10<00:00, 92.13it/s, loss=1.54, td_loss=0.0209, imitator_loss=1.52]

[2m2024-08-01 17:27.44[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=18 step=18000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007105824947357177, 'time_algorithm_update': 0.009842865943908692, 'loss': 1.543579393386841, 'td_loss': 0.020990462765621488, 'imitator_loss': 1.5225889303684235, 'time_step': 0.010721226692199708}[0m [36mstep[0m=[35m18000[0m



Epoch 19/30: 100%|██████████| 1000/1000 [00:10<00:00, 94.83it/s, loss=1.54, td_loss=0.0208, imitator_loss=1.52]

[2m2024-08-01 17:27.55[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=19 step=19000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006940493583679199, 'time_algorithm_update': 0.009567210435867309, 'loss': 1.5451057995557784, 'td_loss': 0.020799904805608095, 'imitator_loss': 1.5243058952093125, 'time_step': 0.010424176454544068}[0m [36mstep[0m=[35m19000[0m



Epoch 20/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.84it/s, loss=1.55, td_loss=0.0272, imitator_loss=1.52]

[2m2024-08-01 17:28.06[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=20 step=20000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007372374534606933, 'time_algorithm_update': 0.010222811937332153, 'loss': 1.550513698220253, 'td_loss': 0.027039564071164934, 'imitator_loss': 1.5234741352796555, 'time_step': 0.011127135753631591}[0m [36mstep[0m=[35m20000[0m



Epoch 21/30: 100%|██████████| 1000/1000 [00:09<00:00, 101.04it/s, loss=1.54, td_loss=0.0177, imitator_loss=1.53]

[2m2024-08-01 17:28.16[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=21 step=21000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006558718681335449, 'time_algorithm_update': 0.008965489864349365, 'loss': 1.5432053513526915, 'td_loss': 0.01754939749708865, 'imitator_loss': 1.525655954360962, 'time_step': 0.00978006362915039}[0m [36mstep[0m=[35m21000[0m



Epoch 22/30: 100%|██████████| 1000/1000 [00:10<00:00, 92.79it/s, loss=1.55, td_loss=0.0223, imitator_loss=1.53] 

[2m2024-08-01 17:28.27[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=22 step=22000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007085540294647217, 'time_algorithm_update': 0.009775044202804565, 'loss': 1.5478443684577943, 'td_loss': 0.022293764881265814, 'imitator_loss': 1.5255506055355073, 'time_step': 0.010651329040527343}[0m [36mstep[0m=[35m22000[0m



Epoch 23/30: 100%|██████████| 1000/1000 [00:11<00:00, 88.09it/s, loss=1.55, td_loss=0.0218, imitator_loss=1.52]


[2m2024-08-01 17:28.38[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=23 step=23000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007376503944396972, 'time_algorithm_update': 0.010319005012512207, 'loss': 1.5449592661857605, 'td_loss': 0.02179303895798512, 'imitator_loss': 1.5231662274599076, 'time_step': 0.011221806526184082}[0m [36mstep[0m=[35m23000[0m


Epoch 24/30: 100%|██████████| 1000/1000 [00:10<00:00, 95.36it/s, loss=1.55, td_loss=0.0228, imitator_loss=1.52]

[2m2024-08-01 17:28.49[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=24 step=24000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006895267963409424, 'time_algorithm_update': 0.009514139890670776, 'loss': 1.5470515998601913, 'td_loss': 0.0227494147066609, 'imitator_loss': 1.5243021844625473, 'time_step': 0.010366205930709839}[0m [36mstep[0m=[35m24000[0m



Epoch 25/30: 100%|██████████| 1000/1000 [00:10<00:00, 97.66it/s, loss=1.54, td_loss=0.0196, imitator_loss=1.52]


[2m2024-08-01 17:28.59[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=25 step=25000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006842608451843262, 'time_algorithm_update': 0.009276899576187134, 'loss': 1.5415381259918213, 'td_loss': 0.01952256157435477, 'imitator_loss': 1.5220155640840531, 'time_step': 0.010121319055557252}[0m [36mstep[0m=[35m25000[0m


Epoch 26/30: 100%|██████████| 1000/1000 [00:09<00:00, 100.35it/s, loss=1.53, td_loss=0.0114, imitator_loss=1.52]

[2m2024-08-01 17:29.09[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=26 step=26000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006679129600524902, 'time_algorithm_update': 0.009024196863174438, 'loss': 1.533997294306755, 'td_loss': 0.011378505818604025, 'imitator_loss': 1.5226187878847122, 'time_step': 0.009850166082382202}[0m [36mstep[0m=[35m26000[0m



Epoch 27/30: 100%|██████████| 1000/1000 [00:10<00:00, 94.09it/s, loss=1.54, td_loss=0.0141, imitator_loss=1.52]

[2m2024-08-01 17:29.20[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=27 step=27000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007053656578063965, 'time_algorithm_update': 0.009631531238555908, 'loss': 1.5385546057224273, 'td_loss': 0.014035729828698095, 'imitator_loss': 1.524518876671791, 'time_step': 0.010500485181808471}[0m [36mstep[0m=[35m27000[0m



Epoch 28/30: 100%|██████████| 1000/1000 [00:11<00:00, 84.77it/s, loss=1.54, td_loss=0.0175, imitator_loss=1.52]

[2m2024-08-01 17:29.31[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=28 step=28000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007618367671966553, 'time_algorithm_update': 0.010729195594787598, 'loss': 1.5412380636930465, 'td_loss': 0.017491436429845634, 'imitator_loss': 1.5237466267347335, 'time_step': 0.011658766031265259}[0m [36mstep[0m=[35m28000[0m



Epoch 29/30: 100%|██████████| 1000/1000 [00:10<00:00, 96.53it/s, loss=1.54, td_loss=0.0179, imitator_loss=1.52]


[2m2024-08-01 17:29.42[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=29 step=29000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0006821260452270508, 'time_algorithm_update': 0.009394659042358398, 'loss': 1.5409485238790512, 'td_loss': 0.01780220093857497, 'imitator_loss': 1.5231463232040405, 'time_step': 0.010241353511810302}[0m [36mstep[0m=[35m29000[0m


Epoch 30/30: 100%|██████████| 1000/1000 [00:11<00:00, 90.22it/s, loss=1.54, td_loss=0.0164, imitator_loss=1.52]

[2m2024-08-01 17:29.53[0m [[32m[1minfo     [0m] [1mDiscreteBCQ_20240801172429: epoch=30 step=30000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0007209749221801757, 'time_algorithm_update': 0.01005538272857666, 'loss': 1.540486165046692, 'td_loss': 0.0163080475666211, 'imitator_loss': 1.5241781190633774, 'time_step': 0.010948191165924073}[0m [36mstep[0m=[35m30000[0m





True