 # Install/Import Packages and Download/Unzip Dataset

 ---

 The following code installs d3rlpy, imports all necessary dependencies, and downloads/unzips the dataset from Zenodo.

In [None]:
!pip install d3rlpy==2.6.1 gymnasium==0.29.1

Collecting d3rlpy
  Downloading d3rlpy-2.6.1-py3-none-any.whl.metadata (11 kB)
Collecting gym>=0.26.0 (from d3rlpy)
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting structlog (from d3rlpy)
  Downloading structlog-24.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting colorama (from d3rlpy)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting dataclasses-json (from d3rlpy)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting gymnasium (from d3rlpy)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->d3rlpy)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata

In [None]:
from google.colab import files
import numpy as np
import d3rlpy
import os

In [None]:
!wget -O full_dataset.zip "https://zenodo.org/record/13830810/files/full_dataset.zip?download=1"

--2024-09-25 21:58:56--  https://zenodo.org/record/13830810/files/full_dataset.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.79.172, 188.184.98.238, 188.184.103.159, ...
Connecting to zenodo.org (zenodo.org)|188.185.79.172|:443... connected.
HTTP request sent, awaiting response... 301 MOVED PERMANENTLY
Location: /records/13830810/files/full_dataset.zip [following]
--2024-09-25 21:58:57--  https://zenodo.org/records/13830810/files/full_dataset.zip
Reusing existing connection to zenodo.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 2161582746 (2.0G) [application/octet-stream]
Saving to: ‘full_dataset.zip’


2024-09-25 22:01:05 (16.1 MB/s) - ‘full_dataset.zip’ saved [2161582746/2161582746]



In [None]:
!unzip full_dataset.zip -d /content/
data_dir = '/content/content/data/data'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/content/data/data/ep_38406.npz  
  inflating: /content/content/data/data/ep_45886.npz  
  inflating: /content/content/data/data/ep_80720.npz  
  inflating: /content/content/data/data/ep_41811.npz  
  inflating: /content/content/data/data/ep_74361.npz  
  inflating: /content/content/data/data/ep_36142.npz  
  inflating: /content/content/data/data/ep_92647.npz  
  inflating: /content/content/data/data/ep_35769.npz  
  inflating: /content/content/data/data/ep_70452.npz  
  inflating: /content/content/data/data/ep_89001.npz  
  inflating: /content/content/data/data/ep_92324.npz  
  inflating: /content/content/data/data/ep_21612.npz  
  inflating: /content/content/data/data/ep_51788.npz  
  inflating: /content/content/data/data/ep_51083.npz  
  inflating: /content/content/data/data/ep_54442.npz  
  inflating: /content/content/data/data/ep_97628.npz  
  inflating: /content/content/data/data/ep_26720.npz  


# Prepare Dataset for d3rlpy

---

The following code loads all of the data into arrays and creates a d3rlpy dataset.

In [None]:
total_steps = 5000000

all_observations = np.empty((total_steps, 3, 64, 64), dtype=np.uint8)
all_actions = np.empty((total_steps, 3), dtype=np.float32)
all_rewards = np.empty((total_steps,), dtype=np.float32)
all_terminals = np.empty((total_steps,), dtype=bool)

current_index = 0

for i in range(1, 5001):
    episode_file = os.path.join(data_dir, f'ep_{i}.npz')
    episode_data = np.load(episode_file)

    num_steps = episode_data['terminals'].shape[0]

    all_observations[current_index:current_index + num_steps] = np.transpose(episode_data['observations'], (0, 3, 1, 2))
    all_actions[current_index:current_index + num_steps] = episode_data['actions']
    all_rewards[current_index:current_index + num_steps] = episode_data['rewards']
    all_terminals[current_index:current_index + num_steps] = episode_data['terminals']

    current_index += num_steps

all_observations = all_observations[:current_index]
all_actions = all_actions[:current_index]
all_rewards = all_rewards[:current_index]
all_terminals = all_terminals[:current_index]

In [None]:
dataset = d3rlpy.dataset.MDPDataset(
    observations=all_observations,
    actions=all_actions,
    rewards=all_rewards,
    terminals=all_terminals,
)

[2m2024-09-25 22:06.48[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(3,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('uint8')], shape=[(3, 64, 64)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2024-09-25 22:06.48[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.CONTINUOUS: 1>[0m
[2m2024-09-25 22:06.48[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m3[0m


# Model Training

---

The following code trains a behavior cloning algorithm for 50 epochs at 10000 steps per epoch.

In [None]:
BC = d3rlpy.algos.BCConfig(observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),).create(device='cuda')
BC.fit(dataset=dataset, n_steps=500000, n_steps_per_epoch=10000)

[2m2024-09-25 22:06.48[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 64, 64)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-09-25 22:06.48[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/BC_20240925220648[0m
[2m2024-09-25 22:06.48[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-09-25 22:06.50[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-09-25 22:06.50[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 64, 64], 'action_size': 3, 'config': {'type': 'bc', 'params': {'batch_size': 100, 'gamma': 0.99, 'observation_scaler': {'type': 'pixel', 'params': {}}, 'act

Epoch 1/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:08.18[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=1 step=10000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031897283554077147, 'time_algorithm_update': 0.0054560060501098635, 'loss': 0.013368270827364177, 'time_step': 0.00874570083618164}[0m [36mstep[0m=[35m10000[0m
[2m2024-09-25 22:08.18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_10000.d3[0m


Epoch 2/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:09.45[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=2 step=20000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032048604726791384, 'time_algorithm_update': 0.00530379319190979, 'loss': 0.004694549460848793, 'time_step': 0.008605687618255616}[0m [36mstep[0m=[35m20000[0m
[2m2024-09-25 22:09.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_20000.d3[0m


Epoch 3/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:11.12[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=3 step=30000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003196194291114807, 'time_algorithm_update': 0.005288543009757995, 'loss': 0.0031358499483088963, 'time_step': 0.008579443001747131}[0m [36mstep[0m=[35m30000[0m
[2m2024-09-25 22:11.12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_30000.d3[0m


Epoch 4/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:12.39[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=4 step=40000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032080032110214235, 'time_algorithm_update': 0.005330584406852722, 'loss': 0.0024568766504875386, 'time_step': 0.008632610106468201}[0m [36mstep[0m=[35m40000[0m
[2m2024-09-25 22:12.39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_40000.d3[0m


Epoch 5/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:14.06[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=5 step=50000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003232589864730835, 'time_algorithm_update': 0.005374012637138366, 'loss': 0.0020755799458944237, 'time_step': 0.008700587105751037}[0m [36mstep[0m=[35m50000[0m
[2m2024-09-25 22:14.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_50000.d3[0m


Epoch 6/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:15.34[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=6 step=60000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032271196603775023, 'time_algorithm_update': 0.005369490957260132, 'loss': 0.0018439271912910045, 'time_step': 0.008689930939674377}[0m [36mstep[0m=[35m60000[0m
[2m2024-09-25 22:15.34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_60000.d3[0m


Epoch 7/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:17.02[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=7 step=70000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003229240679740906, 'time_algorithm_update': 0.005364405751228332, 'loss': 0.0016699045108282008, 'time_step': 0.008687941265106202}[0m [36mstep[0m=[35m70000[0m
[2m2024-09-25 22:17.02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_70000.d3[0m


Epoch 8/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:18.29[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=8 step=80000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032209672689437864, 'time_algorithm_update': 0.005360100913047791, 'loss': 0.0015471019615069964, 'time_step': 0.008674847126007081}[0m [36mstep[0m=[35m80000[0m
[2m2024-09-25 22:18.29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_80000.d3[0m


Epoch 9/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:19.56[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=9 step=90000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032060807943344117, 'time_algorithm_update': 0.005331685137748718, 'loss': 0.0014552591580955776, 'time_step': 0.008630615639686584}[0m [36mstep[0m=[35m90000[0m
[2m2024-09-25 22:19.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_90000.d3[0m


Epoch 10/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:21.24[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=10 step=100000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032215925455093383, 'time_algorithm_update': 0.005369539070129395, 'loss': 0.0013773127661203034, 'time_step': 0.008684787464141846}[0m [36mstep[0m=[35m100000[0m
[2m2024-09-25 22:21.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_100000.d3[0m


Epoch 11/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:22.52[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=11 step=110000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032638611793518067, 'time_algorithm_update': 0.005414358115196228, 'loss': 0.0013295461985340807, 'time_step': 0.008772272443771363}[0m [36mstep[0m=[35m110000[0m
[2m2024-09-25 22:22.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_110000.d3[0m


Epoch 12/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:24.21[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=12 step=120000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003268705916404724, 'time_algorithm_update': 0.005436755609512329, 'loss': 0.0012813476087409072, 'time_step': 0.00879909963607788}[0m [36mstep[0m=[35m120000[0m
[2m2024-09-25 22:24.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_120000.d3[0m


Epoch 13/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:25.49[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=13 step=130000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003237057113647461, 'time_algorithm_update': 0.005379828810691834, 'loss': 0.0012384314448339864, 'time_step': 0.008710951662063598}[0m [36mstep[0m=[35m130000[0m
[2m2024-09-25 22:25.49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_130000.d3[0m


Epoch 14/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:27.16[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=14 step=140000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032186841011047364, 'time_algorithm_update': 0.005339187216758728, 'loss': 0.0011933996333857067, 'time_step': 0.008651342177391053}[0m [36mstep[0m=[35m140000[0m
[2m2024-09-25 22:27.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_140000.d3[0m


Epoch 15/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:28.44[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=15 step=150000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003237862253189087, 'time_algorithm_update': 0.0053791823387145995, 'loss': 0.0011724474836024455, 'time_step': 0.008710360455513001}[0m [36mstep[0m=[35m150000[0m
[2m2024-09-25 22:28.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_150000.d3[0m


Epoch 16/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:30.11[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=16 step=160000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032263838768005373, 'time_algorithm_update': 0.005346194267272949, 'loss': 0.0011310166905110237, 'time_step': 0.008665856194496155}[0m [36mstep[0m=[35m160000[0m
[2m2024-09-25 22:30.11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_160000.d3[0m


Epoch 17/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:31.39[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=17 step=170000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003220563459396362, 'time_algorithm_update': 0.005337246918678284, 'loss': 0.0011150312115729322, 'time_step': 0.00865113115310669}[0m [36mstep[0m=[35m170000[0m
[2m2024-09-25 22:31.39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_170000.d3[0m


Epoch 18/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:33.06[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=18 step=180000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032127987146377563, 'time_algorithm_update': 0.005336316442489624, 'loss': 0.0010917265578871593, 'time_step': 0.008642353391647338}[0m [36mstep[0m=[35m180000[0m
[2m2024-09-25 22:33.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_180000.d3[0m


Epoch 19/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:34.33[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=19 step=190000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031912636518478393, 'time_algorithm_update': 0.00531965491771698, 'loss': 0.0010749607143283357, 'time_step': 0.00860377459526062}[0m [36mstep[0m=[35m190000[0m
[2m2024-09-25 22:34.33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_190000.d3[0m


Epoch 20/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:36.00[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=20 step=200000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032279942274093627, 'time_algorithm_update': 0.005350686931610107, 'loss': 0.0010540436443523503, 'time_step': 0.008672449207305909}[0m [36mstep[0m=[35m200000[0m
[2m2024-09-25 22:36.00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_200000.d3[0m


Epoch 21/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:37.28[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=21 step=210000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032466507911682128, 'time_algorithm_update': 0.005396039032936097, 'loss': 0.0010360247159085702, 'time_step': 0.008736597561836243}[0m [36mstep[0m=[35m210000[0m
[2m2024-09-25 22:37.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_210000.d3[0m


Epoch 22/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:38.56[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=22 step=220000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003241552543640137, 'time_algorithm_update': 0.005371754312515259, 'loss': 0.0010205053977842909, 'time_step': 0.00870711853504181}[0m [36mstep[0m=[35m220000[0m
[2m2024-09-25 22:38.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_220000.d3[0m


Epoch 23/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:40.24[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=23 step=230000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003244349646568298, 'time_algorithm_update': 0.005368873977661133, 'loss': 0.0010130959811329376, 'time_step': 0.008707353496551514}[0m [36mstep[0m=[35m230000[0m
[2m2024-09-25 22:40.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_230000.d3[0m


Epoch 24/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:41.52[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=24 step=240000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032441786766052244, 'time_algorithm_update': 0.0053570419311523435, 'loss': 0.0009893658686429261, 'time_step': 0.008700579285621643}[0m [36mstep[0m=[35m240000[0m
[2m2024-09-25 22:41.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_240000.d3[0m


Epoch 25/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:43.20[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=25 step=250000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003259989595413208, 'time_algorithm_update': 0.005384440040588379, 'loss': 0.0009835363576130476, 'time_step': 0.008742452383041381}[0m [36mstep[0m=[35m250000[0m
[2m2024-09-25 22:43.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_250000.d3[0m


Epoch 26/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:44.49[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=26 step=260000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032976789236068725, 'time_algorithm_update': 0.00545231158733368, 'loss': 0.000976075198725448, 'time_step': 0.008849395370483399}[0m [36mstep[0m=[35m260000[0m
[2m2024-09-25 22:44.49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_260000.d3[0m


Epoch 27/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:46.19[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=27 step=270000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003320257019996643, 'time_algorithm_update': 0.005467599964141845, 'loss': 0.0009647658372297883, 'time_step': 0.00888760793209076}[0m [36mstep[0m=[35m270000[0m
[2m2024-09-25 22:46.19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_270000.d3[0m


Epoch 28/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:47.48[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=28 step=280000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003306185221672058, 'time_algorithm_update': 0.005458228898048401, 'loss': 0.0009514972879667767, 'time_step': 0.008864623284339904}[0m [36mstep[0m=[35m280000[0m
[2m2024-09-25 22:47.48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_280000.d3[0m


Epoch 29/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:49.16[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=29 step=290000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032652963638305664, 'time_algorithm_update': 0.005395893335342407, 'loss': 0.0009391738696780522, 'time_step': 0.008760679697990418}[0m [36mstep[0m=[35m290000[0m
[2m2024-09-25 22:49.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_290000.d3[0m


Epoch 30/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:50.45[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=30 step=300000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032862932920455935, 'time_algorithm_update': 0.005404650592803955, 'loss': 0.000928105514566414, 'time_step': 0.008791369032859802}[0m [36mstep[0m=[35m300000[0m
[2m2024-09-25 22:50.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_300000.d3[0m


Epoch 31/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:52.09[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=31 step=310000[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003084525203704834, 'time_algorithm_update': 0.005152398467063904, 'loss': 0.0009212940548895858, 'time_step': 0.00833358998298645}[0m [36mstep[0m=[35m310000[0m
[2m2024-09-25 22:52.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_310000.d3[0m


Epoch 32/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:53.32[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=32 step=320000[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003020065641403198, 'time_algorithm_update': 0.005082359075546264, 'loss': 0.0009154097966675181, 'time_step': 0.008198232436180114}[0m [36mstep[0m=[35m320000[0m
[2m2024-09-25 22:53.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_320000.d3[0m


Epoch 33/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:54.56[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=33 step=330000[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00309816792011261, 'time_algorithm_update': 0.005163767409324646, 'loss': 0.0009117896112904419, 'time_step': 0.008359869813919068}[0m [36mstep[0m=[35m330000[0m
[2m2024-09-25 22:54.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_330000.d3[0m


Epoch 34/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:56.21[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=34 step=340000[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031068761348724367, 'time_algorithm_update': 0.0051710670232772826, 'loss': 0.0008939107219805009, 'time_step': 0.008375327897071839}[0m [36mstep[0m=[35m340000[0m
[2m2024-09-25 22:56.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_340000.d3[0m


Epoch 35/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:57.44[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=35 step=350000[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003051657152175903, 'time_algorithm_update': 0.005101109075546265, 'loss': 0.0008870358535728883, 'time_step': 0.008249075055122375}[0m [36mstep[0m=[35m350000[0m
[2m2024-09-25 22:57.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_350000.d3[0m


Epoch 36/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:59.08[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=36 step=360000[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003086132836341858, 'time_algorithm_update': 0.005165527629852295, 'loss': 0.0008858619093371089, 'time_step': 0.008349078369140625}[0m [36mstep[0m=[35m360000[0m
[2m2024-09-25 22:59.08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_360000.d3[0m


Epoch 37/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:00.32[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=37 step=370000[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003091616106033325, 'time_algorithm_update': 0.005149014782905578, 'loss': 0.0008757491443160689, 'time_step': 0.0083371826171875}[0m [36mstep[0m=[35m370000[0m
[2m2024-09-25 23:00.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_370000.d3[0m


Epoch 38/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:01.57[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=38 step=380000[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031172500133514404, 'time_algorithm_update': 0.005193886017799377, 'loss': 0.0008715646302211098, 'time_step': 0.008412853360176087}[0m [36mstep[0m=[35m380000[0m
[2m2024-09-25 23:01.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_380000.d3[0m


Epoch 39/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:03.21[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=39 step=390000[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030867045879364014, 'time_algorithm_update': 0.005144149494171142, 'loss': 0.00086876970041194, 'time_step': 0.008328665041923523}[0m [36mstep[0m=[35m390000[0m
[2m2024-09-25 23:03.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_390000.d3[0m


Epoch 40/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:04.45[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=40 step=400000[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030882368803024293, 'time_algorithm_update': 0.005148160910606384, 'loss': 0.0008588520028628409, 'time_step': 0.00833329439163208}[0m [36mstep[0m=[35m400000[0m
[2m2024-09-25 23:04.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_400000.d3[0m


Epoch 41/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:06.10[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=41 step=410000[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031190104961395263, 'time_algorithm_update': 0.005176174759864807, 'loss': 0.0008545692801300901, 'time_step': 0.008392883706092834}[0m [36mstep[0m=[35m410000[0m
[2m2024-09-25 23:06.10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_410000.d3[0m


Epoch 42/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:07.34[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=42 step=420000[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030917845726013186, 'time_algorithm_update': 0.005138249826431274, 'loss': 0.0008532294795993949, 'time_step': 0.008327339792251587}[0m [36mstep[0m=[35m420000[0m
[2m2024-09-25 23:07.34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_420000.d3[0m


Epoch 43/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:08.57[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=43 step=430000[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003054075837135315, 'time_algorithm_update': 0.0051222526788711545, 'loss': 0.0008449636031175032, 'time_step': 0.008272829365730285}[0m [36mstep[0m=[35m430000[0m
[2m2024-09-25 23:08.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_430000.d3[0m


Epoch 44/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:10.21[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=44 step=440000[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003074462676048279, 'time_algorithm_update': 0.00514155707359314, 'loss': 0.0008394733605178771, 'time_step': 0.008314297008514404}[0m [36mstep[0m=[35m440000[0m
[2m2024-09-25 23:10.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_440000.d3[0m


Epoch 45/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:11.44[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=45 step=450000[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030655478715896608, 'time_algorithm_update': 0.005112203454971313, 'loss': 0.0008375885511748493, 'time_step': 0.008274489331245423}[0m [36mstep[0m=[35m450000[0m
[2m2024-09-25 23:11.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_450000.d3[0m


Epoch 46/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:13.08[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=46 step=460000[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030909646034240723, 'time_algorithm_update': 0.0051400225639343265, 'loss': 0.0008271400350029581, 'time_step': 0.008329026627540589}[0m [36mstep[0m=[35m460000[0m
[2m2024-09-25 23:13.08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_460000.d3[0m


Epoch 47/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:14.32[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=47 step=470000[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030617788076400757, 'time_algorithm_update': 0.005121258735656739, 'loss': 0.0008365016159659717, 'time_step': 0.008280534529685975}[0m [36mstep[0m=[35m470000[0m
[2m2024-09-25 23:14.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_470000.d3[0m


Epoch 48/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:15.56[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=48 step=480000[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003081935882568359, 'time_algorithm_update': 0.00514162392616272, 'loss': 0.0008221910869731801, 'time_step': 0.008321880841255187}[0m [36mstep[0m=[35m480000[0m
[2m2024-09-25 23:15.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_480000.d3[0m


Epoch 49/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:17.19[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=49 step=490000[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030671722888946534, 'time_algorithm_update': 0.005124243092536927, 'loss': 0.0008192746541986708, 'time_step': 0.008288922452926636}[0m [36mstep[0m=[35m490000[0m
[2m2024-09-25 23:17.19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_490000.d3[0m


Epoch 50/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:18.43[0m [[32m[1minfo     [0m] [1mBC_20240925220648: epoch=50 step=500000[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030686899185180666, 'time_algorithm_update': 0.005139396548271179, 'loss': 0.0008198076920234598, 'time_step': 0.008305620408058167}[0m [36mstep[0m=[35m500000[0m
[2m2024-09-25 23:18.43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925220648/model_500000.d3[0m


[(1,
  {'time_sample_batch': 0.0031897283554077147,
   'time_algorithm_update': 0.0054560060501098635,
   'loss': 0.013368270827364177,
   'time_step': 0.00874570083618164}),
 (2,
  {'time_sample_batch': 0.0032048604726791384,
   'time_algorithm_update': 0.00530379319190979,
   'loss': 0.004694549460848793,
   'time_step': 0.008605687618255616}),
 (3,
  {'time_sample_batch': 0.003196194291114807,
   'time_algorithm_update': 0.005288543009757995,
   'loss': 0.0031358499483088963,
   'time_step': 0.008579443001747131}),
 (4,
  {'time_sample_batch': 0.0032080032110214235,
   'time_algorithm_update': 0.005330584406852722,
   'loss': 0.0024568766504875386,
   'time_step': 0.008632610106468201}),
 (5,
  {'time_sample_batch': 0.003232589864730835,
   'time_algorithm_update': 0.005374012637138366,
   'loss': 0.0020755799458944237,
   'time_step': 0.008700587105751037}),
 (6,
  {'time_sample_batch': 0.0032271196603775023,
   'time_algorithm_update': 0.005369490957260132,
   'loss': 0.0018439271

In [None]:
!zip -r model_twentieth_dataset.zip d3rlpy_logs

  adding: d3rlpy_logs/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925220648/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925220648/model_90000.d3 (deflated 38%)
  adding: d3rlpy_logs/BC_20240925220648/time_algorithm_update.csv (deflated 60%)
  adding: d3rlpy_logs/BC_20240925220648/time_step.csv (deflated 60%)
  adding: d3rlpy_logs/BC_20240925220648/model_30000.d3 (deflated 24%)
  adding: d3rlpy_logs/BC_20240925220648/model_320000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925220648/model_440000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925220648/model_340000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925220648/model_250000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925220648/model_160000.d3 (deflated 39%)
  adding: d3rlpy_logs/BC_20240925220648/model_290000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925220648/model_130000.d3 (deflated 39%)
  adding: d3rlpy_logs/BC_20240925220648/model_500000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925220648/model_140000.d