 # Install/Import Packages and Download/Unzip Dataset

 ---

 The following code installs d3rlpy, imports all necessary dependencies, and downloads/unzips the dataset from Zenodo.

In [None]:
!pip install d3rlpy==2.6.1 gymnasium==0.29.1

Collecting d3rlpy==2.6.1
  Downloading d3rlpy-2.6.1-py3-none-any.whl.metadata (11 kB)
Collecting gymnasium==0.29.1
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting gym>=0.26.0 (from d3rlpy==2.6.1)
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting structlog (from d3rlpy==2.6.1)
  Downloading structlog-24.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting colorama (from d3rlpy==2.6.1)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting dataclasses-json (from d3rlpy==2.6.1)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium==0.29.1)
  Downloading Farama_Notifications-

In [None]:
from google.colab import files
import numpy as np
import d3rlpy
import os

In [None]:
!wget -O full_dataset.zip "https://zenodo.org/record/13830810/files/full_dataset.zip?download=1"

--2024-10-10 16:37:49--  https://zenodo.org/record/13830810/files/full_dataset.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.184.98.238, 188.185.79.172, 188.184.103.159, ...
Connecting to zenodo.org (zenodo.org)|188.184.98.238|:443... connected.
HTTP request sent, awaiting response... 301 MOVED PERMANENTLY
Location: /records/13830810/files/full_dataset.zip [following]
--2024-10-10 16:37:49--  https://zenodo.org/records/13830810/files/full_dataset.zip
Reusing existing connection to zenodo.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 2161582746 (2.0G) [application/octet-stream]
Saving to: ‘full_dataset.zip’


2024-10-10 16:40:33 (12.6 MB/s) - ‘full_dataset.zip’ saved [2161582746/2161582746]



In [None]:
!unzip full_dataset.zip -d /content/
data_dir = '/content/content/data/data'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/content/data/data/ep_38406.npz  
  inflating: /content/content/data/data/ep_45886.npz  
  inflating: /content/content/data/data/ep_80720.npz  
  inflating: /content/content/data/data/ep_41811.npz  
  inflating: /content/content/data/data/ep_74361.npz  
  inflating: /content/content/data/data/ep_36142.npz  
  inflating: /content/content/data/data/ep_92647.npz  
  inflating: /content/content/data/data/ep_35769.npz  
  inflating: /content/content/data/data/ep_70452.npz  
  inflating: /content/content/data/data/ep_89001.npz  
  inflating: /content/content/data/data/ep_92324.npz  
  inflating: /content/content/data/data/ep_21612.npz  
  inflating: /content/content/data/data/ep_51788.npz  
  inflating: /content/content/data/data/ep_51083.npz  
  inflating: /content/content/data/data/ep_54442.npz  
  inflating: /content/content/data/data/ep_97628.npz  
  inflating: /content/content/data/data/ep_26720.npz  


# Prepare Dataset for d3rlpy

---

The following code loads all of the data into arrays and creates a d3rlpy dataset.

In [None]:
total_steps = 5000000

all_observations = np.empty((total_steps, 3, 64, 64), dtype=np.uint8)
all_actions = np.empty((total_steps, 3), dtype=np.float32)
all_rewards = np.empty((total_steps,), dtype=np.float32)
all_terminals = np.empty((total_steps,), dtype=bool)

current_index = 0

for i in range(1, 99523):
    episode_file = os.path.join(data_dir, f'ep_{i}.npz')
    episode_data = np.load(episode_file)

    num_steps = episode_data['terminals'].shape[0]

    all_observations[current_index:current_index + num_steps] = np.transpose(episode_data['observations'], (0, 3, 1, 2))
    all_actions[current_index:current_index + num_steps] = episode_data['actions']
    all_rewards[current_index:current_index + num_steps] = episode_data['rewards']
    all_terminals[current_index:current_index + num_steps] = episode_data['terminals']

    current_index += num_steps

all_observations = all_observations[:current_index]
all_actions = all_actions[:current_index]
all_rewards = all_rewards[:current_index]
all_terminals = all_terminals[:current_index]

In [None]:
dataset = d3rlpy.dataset.MDPDataset(
    observations=all_observations,
    actions=all_actions,
    rewards=all_rewards,
    terminals=all_terminals,
)

[2m2024-10-10 16:56.08[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(3,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('uint8')], shape=[(3, 64, 64)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2024-10-10 16:56.08[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.CONTINUOUS: 1>[0m
[2m2024-10-10 16:56.08[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m3[0m


# Model Training

---

The following code trains a TD3 + behavior cloning algorithm for 50 epochs at 10000 steps per epoch.

In [None]:
TD3 = d3rlpy.algos.TD3PlusBCConfig(observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),).create(device='cuda')
TD3.fit(dataset=dataset, n_steps=500000, n_steps_per_epoch=10000)

[2m2024-10-10 16:56.13[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 64, 64)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-10-10 16:56.13[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/TD3PlusBC_20241010165613[0m
[2m2024-10-10 16:56.13[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-10-10 16:56.15[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-10-10 16:56.15[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 64, 64], 'action_size': 3, 'config': {'type': 'td3_plus_bc', 'params': {'batch_size': 256, 'gamma': 0.99, 'observation_scaler': {'type': 'pixel', 'pa

Epoch 1/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:00.23[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=1 step=10000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008369894599914551, 'time_algorithm_update': 0.016096764087677, 'critic_loss': 93.82236885137559, 'actor_loss': -2.4616885204315184, 'bc_loss': 0.03657058561388403, 'time_step': 0.024614270520210268}[0m [36mstep[0m=[35m10000[0m
[2m2024-10-10 17:00.23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_10000.d3[0m


Epoch 2/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:04.26[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=2 step=20000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008223797225952149, 'time_algorithm_update': 0.015871265625953673, 'critic_loss': 11.852331443214416, 'actor_loss': -2.4799877138614654, 'bc_loss': 0.01696816382277757, 'time_step': 0.024231420373916627}[0m [36mstep[0m=[35m20000[0m
[2m2024-10-10 17:04.26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_20000.d3[0m


Epoch 3/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:08.31[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=3 step=30000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00825217661857605, 'time_algorithm_update': 0.015892396545410158, 'critic_loss': 4.553166068780422, 'actor_loss': -2.484114922428131, 'bc_loss': 0.015283993274532259, 'time_step': 0.024285818433761597}[0m [36mstep[0m=[35m30000[0m
[2m2024-10-10 17:08.31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_30000.d3[0m


Epoch 4/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:12.35[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=4 step=40000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008270167994499207, 'time_algorithm_update': 0.015890220284461975, 'critic_loss': 2.2316873225986957, 'actor_loss': -2.4878032606124876, 'bc_loss': 0.012165561911650002, 'time_step': 0.02429697051048279}[0m [36mstep[0m=[35m40000[0m
[2m2024-10-10 17:12.35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_40000.d3[0m


Epoch 5/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:16.40[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=5 step=50000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008272950863838197, 'time_algorithm_update': 0.015895522022247314, 'critic_loss': 1.5025633713662625, 'actor_loss': -2.4890024594306945, 'bc_loss': 0.010997539815306663, 'time_step': 0.024305692982673644}[0m [36mstep[0m=[35m50000[0m
[2m2024-10-10 17:16.40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_50000.d3[0m


Epoch 6/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:20.52[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=6 step=60000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008643707919120789, 'time_algorithm_update': 0.016244717979431153, 'critic_loss': 1.2430278807878494, 'actor_loss': -2.489525604724884, 'bc_loss': 0.010474396260641516, 'time_step': 0.025029592990875243}[0m [36mstep[0m=[35m60000[0m
[2m2024-10-10 17:20.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_60000.d3[0m


Epoch 7/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:25.05[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=7 step=70000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008706842398643494, 'time_algorithm_update': 0.016299847626686095, 'critic_loss': 1.1234559755802154, 'actor_loss': -2.4898446751594543, 'bc_loss': 0.010155321916937828, 'time_step': 0.025147555446624757}[0m [36mstep[0m=[35m70000[0m
[2m2024-10-10 17:25.05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_70000.d3[0m


Epoch 8/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:29.17[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=8 step=80000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008676603460311889, 'time_algorithm_update': 0.016233943581581117, 'critic_loss': 1.053903609919548, 'actor_loss': -2.49008480963707, 'bc_loss': 0.009915190615598111, 'time_step': 0.025053392362594604}[0m [36mstep[0m=[35m80000[0m
[2m2024-10-10 17:29.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_80000.d3[0m


Epoch 9/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:33.24[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=9 step=90000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00839881432056427, 'time_algorithm_update': 0.016009279084205626, 'critic_loss': 0.9851951924920082, 'actor_loss': -2.4902837539196017, 'bc_loss': 0.00971624533496797, 'time_step': 0.024550818634033203}[0m [36mstep[0m=[35m90000[0m
[2m2024-10-10 17:33.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_90000.d3[0m


Epoch 10/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:37.36[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=10 step=100000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008678897595405579, 'time_algorithm_update': 0.016236401319503786, 'critic_loss': 0.9467783392131328, 'actor_loss': -2.4904451692581175, 'bc_loss': 0.009554830165859313, 'time_step': 0.02506427137851715}[0m [36mstep[0m=[35m100000[0m
[2m2024-10-10 17:37.37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_100000.d3[0m


Epoch 11/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:41.51[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=11 step=110000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008777221584320068, 'time_algorithm_update': 0.016336337685585022, 'critic_loss': 0.9227805882811546, 'actor_loss': -2.490550042486191, 'bc_loss': 0.009449957593530416, 'time_step': 0.025263777017593383}[0m [36mstep[0m=[35m110000[0m
[2m2024-10-10 17:41.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_110000.d3[0m


Epoch 12/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:46.05[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=12 step=120000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008763574361801148, 'time_algorithm_update': 0.01632258834838867, 'critic_loss': 0.8866549141407013, 'actor_loss': -2.490674619483948, 'bc_loss': 0.009325381684489549, 'time_step': 0.025237699222564698}[0m [36mstep[0m=[35m120000[0m
[2m2024-10-10 17:46.05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_120000.d3[0m


Epoch 13/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:50.16[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=13 step=130000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008589017748832703, 'time_algorithm_update': 0.016168214106559754, 'critic_loss': 0.8710223221182823, 'actor_loss': -2.4907731862545015, 'bc_loss': 0.009226810572016984, 'time_step': 0.02490582983493805}[0m [36mstep[0m=[35m130000[0m
[2m2024-10-10 17:50.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_130000.d3[0m


Epoch 14/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:54.32[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=14 step=140000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00887356629371643, 'time_algorithm_update': 0.016431119060516357, 'critic_loss': 0.8507992959469557, 'actor_loss': -2.490844565820694, 'bc_loss': 0.009155436604842544, 'time_step': 0.02545665099620819}[0m [36mstep[0m=[35m140000[0m
[2m2024-10-10 17:54.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_140000.d3[0m


Epoch 15/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 17:58.45[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=15 step=150000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008725220870971679, 'time_algorithm_update': 0.016322740316390992, 'critic_loss': 0.8371404936403036, 'actor_loss': -2.4909040318489075, 'bc_loss': 0.00909597047045827, 'time_step': 0.025198520755767822}[0m [36mstep[0m=[35m150000[0m
[2m2024-10-10 17:58.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_150000.d3[0m


Epoch 16/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:02.55[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=16 step=160000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008515217995643616, 'time_algorithm_update': 0.016119002413749695, 'critic_loss': 0.8193891100019216, 'actor_loss': -2.4910269939422607, 'bc_loss': 0.0089730059848167, 'time_step': 0.024783273363113402}[0m [36mstep[0m=[35m160000[0m
[2m2024-10-10 18:02.55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_160000.d3[0m


Epoch 17/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:07.01[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=17 step=170000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00832605767250061, 'time_algorithm_update': 0.015940575170516967, 'critic_loss': 0.8158209930241108, 'actor_loss': -2.4910584213256834, 'bc_loss': 0.008941576481796802, 'time_step': 0.02441404185295105}[0m [36mstep[0m=[35m170000[0m
[2m2024-10-10 18:07.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_170000.d3[0m


Epoch 18/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:11.06[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=18 step=180000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008319599485397339, 'time_algorithm_update': 0.015950176024436952, 'critic_loss': 0.8033280640244483, 'actor_loss': -2.491123612308502, 'bc_loss': 0.008876387426629663, 'time_step': 0.02441481702327728}[0m [36mstep[0m=[35m180000[0m
[2m2024-10-10 18:11.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_180000.d3[0m


Epoch 19/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:15.12[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=19 step=190000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008327826523780823, 'time_algorithm_update': 0.015936469745635985, 'critic_loss': 0.7989808128774166, 'actor_loss': -2.491167933177948, 'bc_loss': 0.008832065939344465, 'time_step': 0.024411268854141237}[0m [36mstep[0m=[35m190000[0m
[2m2024-10-10 18:15.12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_190000.d3[0m


Epoch 20/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:19.17[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=20 step=200000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008285526823997498, 'time_algorithm_update': 0.015888667368888856, 'critic_loss': 0.7786421937972308, 'actor_loss': -2.491217666053772, 'bc_loss': 0.00878232999863103, 'time_step': 0.024321374583244324}[0m [36mstep[0m=[35m200000[0m
[2m2024-10-10 18:19.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_200000.d3[0m


Epoch 21/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:23.22[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=21 step=210000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00831637032032013, 'time_algorithm_update': 0.015929314851760863, 'critic_loss': 0.7713563157200813, 'actor_loss': -2.4913064734458925, 'bc_loss': 0.008693527574650943, 'time_step': 0.02439339678287506}[0m [36mstep[0m=[35m210000[0m
[2m2024-10-10 18:23.22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_210000.d3[0m


Epoch 22/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:27.27[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=22 step=220000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008300255179405212, 'time_algorithm_update': 0.015908022499084472, 'critic_loss': 0.7665328591227532, 'actor_loss': -2.4913115739822387, 'bc_loss': 0.00868842623718083, 'time_step': 0.024357079315185548}[0m [36mstep[0m=[35m220000[0m
[2m2024-10-10 18:27.27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_220000.d3[0m


Epoch 23/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:31.32[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=23 step=230000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008308545804023742, 'time_algorithm_update': 0.015921415615081785, 'critic_loss': 0.7571177019506693, 'actor_loss': -2.4913679856300353, 'bc_loss': 0.008632011506333947, 'time_step': 0.024378301572799684}[0m [36mstep[0m=[35m230000[0m
[2m2024-10-10 18:31.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_230000.d3[0m


Epoch 24/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:35.38[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=24 step=240000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00831756100654602, 'time_algorithm_update': 0.015927716159820558, 'critic_loss': 0.7564041999787092, 'actor_loss': -2.491388548183441, 'bc_loss': 0.008611453701462596, 'time_step': 0.02439326825141907}[0m [36mstep[0m=[35m240000[0m
[2m2024-10-10 18:35.38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_240000.d3[0m


Epoch 25/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:39.43[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=25 step=250000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00829435772895813, 'time_algorithm_update': 0.015926110100746155, 'critic_loss': 0.7468443058371544, 'actor_loss': -2.4914287827968598, 'bc_loss': 0.00857121749734506, 'time_step': 0.024368651580810547}[0m [36mstep[0m=[35m250000[0m
[2m2024-10-10 18:39.43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_250000.d3[0m


Epoch 26/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:43.51[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=26 step=260000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008463542199134826, 'time_algorithm_update': 0.016040042614936828, 'critic_loss': 0.7390779946565628, 'actor_loss': -2.491497055339813, 'bc_loss': 0.008502943467814475, 'time_step': 0.02465445213317871}[0m [36mstep[0m=[35m260000[0m
[2m2024-10-10 18:43.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_260000.d3[0m


Epoch 27/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:47.56[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=27 step=270000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008314588618278504, 'time_algorithm_update': 0.01590666890144348, 'critic_loss': 0.7339670538514853, 'actor_loss': -2.491547439289093, 'bc_loss': 0.008452561502810568, 'time_step': 0.024370671820640562}[0m [36mstep[0m=[35m270000[0m
[2m2024-10-10 18:47.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_270000.d3[0m


Epoch 28/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:52.02[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=28 step=280000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008318086457252502, 'time_algorithm_update': 0.015944428873062133, 'critic_loss': 0.7291089138120412, 'actor_loss': -2.4915782790184022, 'bc_loss': 0.008421719374321401, 'time_step': 0.024411959505081175}[0m [36mstep[0m=[35m280000[0m
[2m2024-10-10 18:52.02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_280000.d3[0m


Epoch 29/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 18:56.07[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=29 step=290000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008318595027923583, 'time_algorithm_update': 0.01591383261680603, 'critic_loss': 0.7270001979917288, 'actor_loss': -2.491584192371368, 'bc_loss': 0.008415809055417776, 'time_step': 0.02438235363960266}[0m [36mstep[0m=[35m290000[0m
[2m2024-10-10 18:56.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_290000.d3[0m


Epoch 30/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:00.15[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=30 step=300000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008450653767585754, 'time_algorithm_update': 0.016035593676567077, 'critic_loss': 0.7281509418487548, 'actor_loss': -2.4915890518665313, 'bc_loss': 0.008410949482209981, 'time_step': 0.024638506698608398}[0m [36mstep[0m=[35m300000[0m
[2m2024-10-10 19:00.15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_300000.d3[0m


Epoch 31/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:04.21[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=31 step=310000[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00835483193397522, 'time_algorithm_update': 0.0159528879404068, 'critic_loss': 0.7210866881877184, 'actor_loss': -2.49165072183609, 'bc_loss': 0.008349281927384437, 'time_step': 0.02445734634399414}[0m [36mstep[0m=[35m310000[0m
[2m2024-10-10 19:04.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_310000.d3[0m


Epoch 32/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:08.29[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=32 step=320000[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008415953159332275, 'time_algorithm_update': 0.016034626030921935, 'critic_loss': 0.7188543949991465, 'actor_loss': -2.4916436215400695, 'bc_loss': 0.008356376015115529, 'time_step': 0.02460224106311798}[0m [36mstep[0m=[35m320000[0m
[2m2024-10-10 19:08.29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_320000.d3[0m


Epoch 33/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:12.36[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=33 step=330000[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008389064264297485, 'time_algorithm_update': 0.015983694434165953, 'critic_loss': 0.7159581362098455, 'actor_loss': -2.4916899742603302, 'bc_loss': 0.008310024374630302, 'time_step': 0.02452444977760315}[0m [36mstep[0m=[35m330000[0m
[2m2024-10-10 19:12.36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_330000.d3[0m


Epoch 34/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:16.44[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=34 step=340000[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00846349847316742, 'time_algorithm_update': 0.01607961423397064, 'critic_loss': 0.7112474976748228, 'actor_loss': -2.4916893817901613, 'bc_loss': 0.008310615371353924, 'time_step': 0.024695219802856445}[0m [36mstep[0m=[35m340000[0m
[2m2024-10-10 19:16.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_340000.d3[0m


Epoch 35/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:20.53[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=35 step=350000[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008511866402626038, 'time_algorithm_update': 0.01610186882019043, 'critic_loss': 0.7054205229461193, 'actor_loss': -2.49173648481369, 'bc_loss': 0.008263514284882695, 'time_step': 0.024766845464706422}[0m [36mstep[0m=[35m350000[0m
[2m2024-10-10 19:20.54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_350000.d3[0m


Epoch 36/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:25.00[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=36 step=360000[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008383537673950196, 'time_algorithm_update': 0.0159504008769989, 'critic_loss': 0.7056578181445599, 'actor_loss': -2.4917683920383453, 'bc_loss': 0.008231605083774775, 'time_step': 0.02448527271747589}[0m [36mstep[0m=[35m360000[0m
[2m2024-10-10 19:25.00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_360000.d3[0m


Epoch 37/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:29.05[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=37 step=370000[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00831787657737732, 'time_algorithm_update': 0.015939566564559936, 'critic_loss': 0.6932222690820694, 'actor_loss': -2.491794641542435, 'bc_loss': 0.008205358919594436, 'time_step': 0.02440950231552124}[0m [36mstep[0m=[35m370000[0m
[2m2024-10-10 19:29.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_370000.d3[0m


Epoch 38/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:33.12[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=38 step=380000[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00833928451538086, 'time_algorithm_update': 0.01596241157054901, 'critic_loss': 0.6943919555902481, 'actor_loss': -2.491808060789108, 'bc_loss': 0.008191939567029476, 'time_step': 0.02445426881313324}[0m [36mstep[0m=[35m380000[0m
[2m2024-10-10 19:33.12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_380000.d3[0m


Epoch 39/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:37.17[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=39 step=390000[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00834537079334259, 'time_algorithm_update': 0.015931197690963746, 'critic_loss': 0.6929649500876666, 'actor_loss': -2.4918397902965546, 'bc_loss': 0.008160209479834885, 'time_step': 0.02442955617904663}[0m [36mstep[0m=[35m390000[0m
[2m2024-10-10 19:37.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_390000.d3[0m


Epoch 40/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:41.22[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=40 step=400000[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008304335856437682, 'time_algorithm_update': 0.015879396629333496, 'critic_loss': 0.6876953067570925, 'actor_loss': -2.4918643624782564, 'bc_loss': 0.008135637897998095, 'time_step': 0.02433571937084198}[0m [36mstep[0m=[35m400000[0m
[2m2024-10-10 19:41.22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_400000.d3[0m


Epoch 41/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:45.28[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=41 step=410000[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008313293647766113, 'time_algorithm_update': 0.015918340587615966, 'critic_loss': 0.6863886120587588, 'actor_loss': -2.491875281429291, 'bc_loss': 0.008124719417840242, 'time_step': 0.024384809374809264}[0m [36mstep[0m=[35m410000[0m
[2m2024-10-10 19:45.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_410000.d3[0m


Epoch 42/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:49.32[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=42 step=420000[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008315020155906678, 'time_algorithm_update': 0.015860623741149903, 'critic_loss': 0.6802896941334009, 'actor_loss': -2.491886935329437, 'bc_loss': 0.00811306656403467, 'time_step': 0.02432782506942749}[0m [36mstep[0m=[35m420000[0m
[2m2024-10-10 19:49.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_420000.d3[0m


Epoch 43/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:53.37[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=43 step=430000[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008282133555412293, 'time_algorithm_update': 0.015856647658348082, 'critic_loss': 0.6813952497154474, 'actor_loss': -2.4918975630283358, 'bc_loss': 0.008102436949871481, 'time_step': 0.024291456007957457}[0m [36mstep[0m=[35m430000[0m
[2m2024-10-10 19:53.37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_430000.d3[0m


Epoch 44/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 19:57.41[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=44 step=440000[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008263855290412903, 'time_algorithm_update': 0.01584268045425415, 'critic_loss': 0.6779339733213187, 'actor_loss': -2.4919136181354524, 'bc_loss': 0.008086380574200303, 'time_step': 0.024258054900169374}[0m [36mstep[0m=[35m440000[0m
[2m2024-10-10 19:57.41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_440000.d3[0m


Epoch 45/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 20:01.52[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=45 step=450000[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008603055810928345, 'time_algorithm_update': 0.016184800839424134, 'critic_loss': 0.6773248409807682, 'actor_loss': -2.491942429971695, 'bc_loss': 0.008057570416107773, 'time_step': 0.02494204785823822}[0m [36mstep[0m=[35m450000[0m
[2m2024-10-10 20:01.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_450000.d3[0m


Epoch 46/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 20:05.58[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=46 step=460000[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00836331741809845, 'time_algorithm_update': 0.0159411559343338, 'critic_loss': 0.678073507219553, 'actor_loss': -2.491938128566742, 'bc_loss': 0.008061872575711459, 'time_step': 0.024457329797744752}[0m [36mstep[0m=[35m460000[0m
[2m2024-10-10 20:05.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_460000.d3[0m


Epoch 47/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 20:10.02[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=47 step=470000[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008271672749519348, 'time_algorithm_update': 0.015854731130599976, 'critic_loss': 0.6705604202032089, 'actor_loss': -2.4920024238586427, 'bc_loss': 0.007997575374506414, 'time_step': 0.02427917494773865}[0m [36mstep[0m=[35m470000[0m
[2m2024-10-10 20:10.02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_470000.d3[0m


Epoch 48/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 20:14.07[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=48 step=480000[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008306891131401062, 'time_algorithm_update': 0.015878179359436036, 'critic_loss': 0.6668334705114365, 'actor_loss': -2.492000475358963, 'bc_loss': 0.007999522688239813, 'time_step': 0.024337128472328187}[0m [36mstep[0m=[35m480000[0m
[2m2024-10-10 20:14.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_480000.d3[0m


Epoch 49/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 20:18.16[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=49 step=490000[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008497915077209473, 'time_algorithm_update': 0.01606016035079956, 'critic_loss': 0.6690678251951933, 'actor_loss': -2.492020642662048, 'bc_loss': 0.007979360209126025, 'time_step': 0.024712475061416626}[0m [36mstep[0m=[35m490000[0m
[2m2024-10-10 20:18.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_490000.d3[0m


Epoch 50/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-10-10 20:22.21[0m [[32m[1minfo     [0m] [1mTD3PlusBC_20241010165613: epoch=50 step=500000[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.008335050678253174, 'time_algorithm_update': 0.015917618370056154, 'critic_loss': 0.6676474977880716, 'actor_loss': -2.4920443999767303, 'bc_loss': 0.007955600764788687, 'time_step': 0.02440484297275543}[0m [36mstep[0m=[35m500000[0m
[2m2024-10-10 20:22.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/TD3PlusBC_20241010165613/model_500000.d3[0m


[(1,
  {'time_sample_batch': 0.008369894599914551,
   'time_algorithm_update': 0.016096764087677,
   'critic_loss': 93.82236885137559,
   'actor_loss': -2.4616885204315184,
   'bc_loss': 0.03657058561388403,
   'time_step': 0.024614270520210268}),
 (2,
  {'time_sample_batch': 0.008223797225952149,
   'time_algorithm_update': 0.015871265625953673,
   'critic_loss': 11.852331443214416,
   'actor_loss': -2.4799877138614654,
   'bc_loss': 0.01696816382277757,
   'time_step': 0.024231420373916627}),
 (3,
  {'time_sample_batch': 0.00825217661857605,
   'time_algorithm_update': 0.015892396545410158,
   'critic_loss': 4.553166068780422,
   'actor_loss': -2.484114922428131,
   'bc_loss': 0.015283993274532259,
   'time_step': 0.024285818433761597}),
 (4,
  {'time_sample_batch': 0.008270167994499207,
   'time_algorithm_update': 0.015890220284461975,
   'critic_loss': 2.2316873225986957,
   'actor_loss': -2.4878032606124876,
   'bc_loss': 0.012165561911650002,
   'time_step': 0.02429697051048279})

In [None]:
!zip -r td3_model_full_dataset.zip d3rlpy_logs

  adding: d3rlpy_logs/ (stored 0%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/ (stored 0%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/model_460000.d3 (deflated 23%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/model_220000.d3 (deflated 23%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/model_410000.d3 (deflated 23%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/model_280000.d3 (deflated 23%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/model_180000.d3 (deflated 22%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/model_40000.d3 (deflated 15%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/model_50000.d3 (deflated 15%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/model_190000.d3 (deflated 22%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/params.json (deflated 76%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/time_step.csv (deflated 59%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/time_sample_batch.csv (deflated 60%)
  adding: d3rlpy_logs/TD3PlusBC_20241010165613/mo