 # Install/Import Packages and Download/Unzip Dataset

 ---

 The following code installs d3rlpy, imports all necessary dependencies, and downloads/unzips the dataset from Zenodo.

In [None]:
!pip install d3rlpy==2.6.1 gymnasium==0.29.1

Collecting d3rlpy
  Downloading d3rlpy-2.6.1-py3-none-any.whl.metadata (11 kB)
Collecting gym>=0.26.0 (from d3rlpy)
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting structlog (from d3rlpy)
  Downloading structlog-24.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting colorama (from d3rlpy)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting dataclasses-json (from d3rlpy)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting gymnasium (from d3rlpy)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->d3rlpy)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata

In [None]:
from google.colab import files
import numpy as np
import d3rlpy
import os

In [None]:
!wget -O full_dataset.zip "https://zenodo.org/record/13830810/files/full_dataset.zip?download=1"

--2024-09-25 21:39:44--  https://zenodo.org/record/13830810/files/full_dataset.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.79.172, 188.184.98.238, 188.184.103.159, ...
Connecting to zenodo.org (zenodo.org)|188.185.79.172|:443... connected.
HTTP request sent, awaiting response... 301 MOVED PERMANENTLY
Location: /records/13830810/files/full_dataset.zip [following]
--2024-09-25 21:39:45--  https://zenodo.org/records/13830810/files/full_dataset.zip
Reusing existing connection to zenodo.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 2161582746 (2.0G) [application/octet-stream]
Saving to: ‘full_dataset.zip’


2024-09-25 21:42:43 (11.6 MB/s) - ‘full_dataset.zip’ saved [2161582746/2161582746]



In [None]:
!unzip full_dataset.zip -d /content/
data_dir = '/content/content/data/data'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/content/data/data/ep_38406.npz  
  inflating: /content/content/data/data/ep_45886.npz  
  inflating: /content/content/data/data/ep_80720.npz  
  inflating: /content/content/data/data/ep_41811.npz  
  inflating: /content/content/data/data/ep_74361.npz  
  inflating: /content/content/data/data/ep_36142.npz  
  inflating: /content/content/data/data/ep_92647.npz  
  inflating: /content/content/data/data/ep_35769.npz  
  inflating: /content/content/data/data/ep_70452.npz  
  inflating: /content/content/data/data/ep_89001.npz  
  inflating: /content/content/data/data/ep_92324.npz  
  inflating: /content/content/data/data/ep_21612.npz  
  inflating: /content/content/data/data/ep_51788.npz  
  inflating: /content/content/data/data/ep_51083.npz  
  inflating: /content/content/data/data/ep_54442.npz  
  inflating: /content/content/data/data/ep_97628.npz  
  inflating: /content/content/data/data/ep_26720.npz  


# Prepare Dataset for d3rlpy

---

The following code loads all of the data into arrays and creates a d3rlpy dataset.

In [None]:
total_steps = 5000000

all_observations = np.empty((total_steps, 3, 64, 64), dtype=np.uint8)
all_actions = np.empty((total_steps, 3), dtype=np.float32)
all_rewards = np.empty((total_steps,), dtype=np.float32)
all_terminals = np.empty((total_steps,), dtype=bool)

current_index = 0

for i in range(1, 10001):
    episode_file = os.path.join(data_dir, f'ep_{i}.npz')
    episode_data = np.load(episode_file)

    num_steps = episode_data['terminals'].shape[0]

    all_observations[current_index:current_index + num_steps] = np.transpose(episode_data['observations'], (0, 3, 1, 2))
    all_actions[current_index:current_index + num_steps] = episode_data['actions']
    all_rewards[current_index:current_index + num_steps] = episode_data['rewards']
    all_terminals[current_index:current_index + num_steps] = episode_data['terminals']

    current_index += num_steps

all_observations = all_observations[:current_index]
all_actions = all_actions[:current_index]
all_rewards = all_rewards[:current_index]
all_terminals = all_terminals[:current_index]

In [None]:
dataset = d3rlpy.dataset.MDPDataset(
    observations=all_observations,
    actions=all_actions,
    rewards=all_rewards,
    terminals=all_terminals,
)

[2m2024-09-25 21:48.34[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(3,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('uint8')], shape=[(3, 64, 64)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2024-09-25 21:48.34[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.CONTINUOUS: 1>[0m
[2m2024-09-25 21:48.34[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m3[0m


# Model Training

---

The following code trains a behavior cloning algorithm for 50 epochs at 10000 steps per epoch.

In [None]:
BC = d3rlpy.algos.BCConfig(observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),).create(device='cuda')
BC.fit(dataset=dataset, n_steps=500000, n_steps_per_epoch=10000)

[2m2024-09-25 21:48.35[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 64, 64)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-09-25 21:48.35[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/BC_20240925214835[0m
[2m2024-09-25 21:48.35[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-09-25 21:48.36[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-09-25 21:48.36[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 64, 64], 'action_size': 3, 'config': {'type': 'bc', 'params': {'batch_size': 100, 'gamma': 0.99, 'observation_scaler': {'type': 'pixel', 'params': {}}, 'act

Epoch 1/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:50.04[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=1 step=10000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031838195085525513, 'time_algorithm_update': 0.005411988949775696, 'loss': 0.014970203944668173, 'time_step': 0.00869471035003662}[0m [36mstep[0m=[35m10000[0m
[2m2024-09-25 21:50.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_10000.d3[0m


Epoch 2/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:51.30[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=2 step=20000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031653521299362182, 'time_algorithm_update': 0.005235780000686646, 'loss': 0.0069501314929220825, 'time_step': 0.008497553396224976}[0m [36mstep[0m=[35m20000[0m
[2m2024-09-25 21:51.30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_20000.d3[0m


Epoch 3/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:52.55[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=3 step=30000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003161275577545166, 'time_algorithm_update': 0.005229719948768616, 'loss': 0.0051841985889012, 'time_step': 0.008485549902915954}[0m [36mstep[0m=[35m30000[0m
[2m2024-09-25 21:52.55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_30000.d3[0m


Epoch 4/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:54.24[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=4 step=40000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003263209319114685, 'time_algorithm_update': 0.005396114301681519, 'loss': 0.004278661334328354, 'time_step': 0.008754119086265564}[0m [36mstep[0m=[35m40000[0m
[2m2024-09-25 21:54.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_40000.d3[0m


Epoch 5/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:55.52[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=5 step=50000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003275758147239685, 'time_algorithm_update': 0.0054149534940719606, 'loss': 0.003696728504204657, 'time_step': 0.008785446071624755}[0m [36mstep[0m=[35m50000[0m
[2m2024-09-25 21:55.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_50000.d3[0m


Epoch 6/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:57.21[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=6 step=60000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003268582773208618, 'time_algorithm_update': 0.005420827221870423, 'loss': 0.0033371019818121566, 'time_step': 0.008784217810630799}[0m [36mstep[0m=[35m60000[0m
[2m2024-09-25 21:57.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_60000.d3[0m


Epoch 7/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:58.50[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=7 step=70000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003283676600456238, 'time_algorithm_update': 0.005421474599838257, 'loss': 0.0030928311260067856, 'time_step': 0.008799729108810425}[0m [36mstep[0m=[35m70000[0m
[2m2024-09-25 21:58.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_70000.d3[0m


Epoch 8/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:00.17[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=8 step=80000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032258246421813966, 'time_algorithm_update': 0.005353373646736145, 'loss': 0.0028922974045039154, 'time_step': 0.008673968935012818}[0m [36mstep[0m=[35m80000[0m
[2m2024-09-25 22:00.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_80000.d3[0m


Epoch 9/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:01.45[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=9 step=90000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00324236307144165, 'time_algorithm_update': 0.005360935354232788, 'loss': 0.002740842607524246, 'time_step': 0.008697488141059875}[0m [36mstep[0m=[35m90000[0m
[2m2024-09-25 22:01.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_90000.d3[0m


Epoch 10/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:03.14[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=10 step=100000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003292630171775818, 'time_algorithm_update': 0.005429200458526611, 'loss': 0.002633773699996527, 'time_step': 0.008816345047950744}[0m [36mstep[0m=[35m100000[0m
[2m2024-09-25 22:03.14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_100000.d3[0m


Epoch 11/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:04.42[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=11 step=110000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032811252355575563, 'time_algorithm_update': 0.005411161923408508, 'loss': 0.002524056645878591, 'time_step': 0.008786295461654664}[0m [36mstep[0m=[35m110000[0m
[2m2024-09-25 22:04.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_110000.d3[0m


Epoch 12/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:06.11[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=12 step=120000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0033060031414031982, 'time_algorithm_update': 0.005436186408996582, 'loss': 0.002450990511814598, 'time_step': 0.008837490582466126}[0m [36mstep[0m=[35m120000[0m
[2m2024-09-25 22:06.11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_120000.d3[0m


Epoch 13/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:07.40[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=13 step=130000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032745336532592774, 'time_algorithm_update': 0.0054129885196685795, 'loss': 0.002379483258386608, 'time_step': 0.008780745339393615}[0m [36mstep[0m=[35m130000[0m
[2m2024-09-25 22:07.40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_130000.d3[0m


Epoch 14/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:09.07[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=14 step=140000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003200698709487915, 'time_algorithm_update': 0.005292260456085205, 'loss': 0.0023158129666699095, 'time_step': 0.008586254525184632}[0m [36mstep[0m=[35m140000[0m
[2m2024-09-25 22:09.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_140000.d3[0m


Epoch 15/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:10.34[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=15 step=150000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032206162214279176, 'time_algorithm_update': 0.005323385405540467, 'loss': 0.002273557998391334, 'time_step': 0.00863715980052948}[0m [36mstep[0m=[35m150000[0m
[2m2024-09-25 22:10.34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_150000.d3[0m


Epoch 16/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:12.03[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=16 step=160000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032944633007049562, 'time_algorithm_update': 0.0054304610252380375, 'loss': 0.0022229082095320337, 'time_step': 0.00881906898021698}[0m [36mstep[0m=[35m160000[0m
[2m2024-09-25 22:12.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_160000.d3[0m


Epoch 17/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:13.30[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=17 step=170000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032277889728546142, 'time_algorithm_update': 0.005326460456848144, 'loss': 0.0021846204858273267, 'time_step': 0.008647836208343506}[0m [36mstep[0m=[35m170000[0m
[2m2024-09-25 22:13.30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_170000.d3[0m


Epoch 18/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:14.58[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=18 step=180000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032592554330825806, 'time_algorithm_update': 0.005366049218177796, 'loss': 0.0021545191331068052, 'time_step': 0.008719237089157105}[0m [36mstep[0m=[35m180000[0m
[2m2024-09-25 22:14.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_180000.d3[0m


Epoch 19/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:16.28[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=19 step=190000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003345963501930237, 'time_algorithm_update': 0.0054786461353302, 'loss': 0.0021049991878680885, 'time_step': 0.008919927644729615}[0m [36mstep[0m=[35m190000[0m
[2m2024-09-25 22:16.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_190000.d3[0m


Epoch 20/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:17.57[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=20 step=200000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003302502369880676, 'time_algorithm_update': 0.0054437844514846805, 'loss': 0.00207107840286335, 'time_step': 0.008841292214393616}[0m [36mstep[0m=[35m200000[0m
[2m2024-09-25 22:17.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_200000.d3[0m


Epoch 21/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:19.23[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=21 step=210000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031882611751556395, 'time_algorithm_update': 0.005267761516571045, 'loss': 0.0020552124604349955, 'time_step': 0.008549983048439026}[0m [36mstep[0m=[35m210000[0m
[2m2024-09-25 22:19.23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_210000.d3[0m


Epoch 22/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:20.50[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=22 step=220000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003226104021072388, 'time_algorithm_update': 0.005338523459434509, 'loss': 0.0020165411327383483, 'time_step': 0.008659071898460389}[0m [36mstep[0m=[35m220000[0m
[2m2024-09-25 22:20.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_220000.d3[0m


Epoch 23/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:22.19[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=23 step=230000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032922091007232665, 'time_algorithm_update': 0.005419749402999878, 'loss': 0.0020062380300834774, 'time_step': 0.00880686423778534}[0m [36mstep[0m=[35m230000[0m
[2m2024-09-25 22:22.19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_230000.d3[0m


Epoch 24/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:23.48[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=24 step=240000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032986714601516724, 'time_algorithm_update': 0.005420556426048279, 'loss': 0.0019792984400410205, 'time_step': 0.00881349573135376}[0m [36mstep[0m=[35m240000[0m
[2m2024-09-25 22:23.48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_240000.d3[0m


Epoch 25/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:25.17[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=25 step=250000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032974958181381225, 'time_algorithm_update': 0.005442364001274109, 'loss': 0.0019577084185904824, 'time_step': 0.00883442554473877}[0m [36mstep[0m=[35m250000[0m
[2m2024-09-25 22:25.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_250000.d3[0m


Epoch 26/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:26.46[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=26 step=260000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003308743715286255, 'time_algorithm_update': 0.005431873488426209, 'loss': 0.0019444295328110456, 'time_step': 0.008835584497451782}[0m [36mstep[0m=[35m260000[0m
[2m2024-09-25 22:26.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_260000.d3[0m


Epoch 27/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:28.15[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=27 step=270000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032815353870391845, 'time_algorithm_update': 0.005414558219909668, 'loss': 0.0019301561183994636, 'time_step': 0.008791773009300232}[0m [36mstep[0m=[35m270000[0m
[2m2024-09-25 22:28.15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_270000.d3[0m


Epoch 28/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:29.41[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=28 step=280000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031774933099746705, 'time_algorithm_update': 0.005270074272155762, 'loss': 0.0019128877511364409, 'time_step': 0.008541326332092284}[0m [36mstep[0m=[35m280000[0m
[2m2024-09-25 22:29.41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_280000.d3[0m


Epoch 29/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:31.07[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=29 step=290000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031819976568222045, 'time_algorithm_update': 0.005268276286125183, 'loss': 0.0018916462942725047, 'time_step': 0.008544596600532531}[0m [36mstep[0m=[35m290000[0m
[2m2024-09-25 22:31.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_290000.d3[0m


Epoch 30/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:32.33[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=30 step=300000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003175442624092102, 'time_algorithm_update': 0.005266555500030518, 'loss': 0.001872131968132453, 'time_step': 0.008535158228874207}[0m [36mstep[0m=[35m300000[0m
[2m2024-09-25 22:32.33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_300000.d3[0m


Epoch 31/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:34.00[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=31 step=310000[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003182836651802063, 'time_algorithm_update': 0.005261567187309265, 'loss': 0.0018557188749546184, 'time_step': 0.008538687252998353}[0m [36mstep[0m=[35m310000[0m
[2m2024-09-25 22:34.00[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_310000.d3[0m


Epoch 32/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:35.26[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=32 step=320000[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003182061815261841, 'time_algorithm_update': 0.005271529912948609, 'loss': 0.0018449689696310087, 'time_step': 0.00854739785194397}[0m [36mstep[0m=[35m320000[0m
[2m2024-09-25 22:35.26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_320000.d3[0m


Epoch 33/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:36.52[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=33 step=330000[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003188136625289917, 'time_algorithm_update': 0.005276934027671814, 'loss': 0.00183798296431778, 'time_step': 0.008559712219238281}[0m [36mstep[0m=[35m330000[0m
[2m2024-09-25 22:36.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_330000.d3[0m


Epoch 34/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:38.18[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=34 step=340000[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003183746099472046, 'time_algorithm_update': 0.005264061737060547, 'loss': 0.0018103045554831625, 'time_step': 0.008540675973892213}[0m [36mstep[0m=[35m340000[0m
[2m2024-09-25 22:38.18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_340000.d3[0m


Epoch 35/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:39.44[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=35 step=350000[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031844319581985476, 'time_algorithm_update': 0.005256108164787292, 'loss': 0.0018231679493328557, 'time_step': 0.008533996963500977}[0m [36mstep[0m=[35m350000[0m
[2m2024-09-25 22:39.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_350000.d3[0m


Epoch 36/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:41.11[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=36 step=360000[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00319351110458374, 'time_algorithm_update': 0.005284977912902832, 'loss': 0.0017924998553702609, 'time_step': 0.00857215654850006}[0m [36mstep[0m=[35m360000[0m
[2m2024-09-25 22:41.11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_360000.d3[0m


Epoch 37/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:42.37[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=37 step=370000[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003190884566307068, 'time_algorithm_update': 0.005276094961166382, 'loss': 0.0017860891702934168, 'time_step': 0.008560590982437133}[0m [36mstep[0m=[35m370000[0m
[2m2024-09-25 22:42.37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_370000.d3[0m


Epoch 38/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:44.03[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=38 step=380000[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031891957759857177, 'time_algorithm_update': 0.005270459961891174, 'loss': 0.0017778900633216835, 'time_step': 0.00855359423160553}[0m [36mstep[0m=[35m380000[0m
[2m2024-09-25 22:44.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_380000.d3[0m


Epoch 39/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:45.30[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=39 step=390000[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032019293785095215, 'time_algorithm_update': 0.005287360215187073, 'loss': 0.001769236582494341, 'time_step': 0.00858320140838623}[0m [36mstep[0m=[35m390000[0m
[2m2024-09-25 22:45.30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_390000.d3[0m


Epoch 40/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:46.56[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=40 step=400000[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031876251459121703, 'time_algorithm_update': 0.005285028147697449, 'loss': 0.0017734937855158933, 'time_step': 0.00856611909866333}[0m [36mstep[0m=[35m400000[0m
[2m2024-09-25 22:46.56[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_400000.d3[0m


Epoch 41/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:48.23[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=41 step=410000[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003193825149536133, 'time_algorithm_update': 0.0052756954431533816, 'loss': 0.0017595505047240294, 'time_step': 0.008563565826416016}[0m [36mstep[0m=[35m410000[0m
[2m2024-09-25 22:48.23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_410000.d3[0m


Epoch 42/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:49.49[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=42 step=420000[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031873313188552858, 'time_algorithm_update': 0.0052672836780548096, 'loss': 0.0017429948640288785, 'time_step': 0.008548065900802612}[0m [36mstep[0m=[35m420000[0m
[2m2024-09-25 22:49.49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_420000.d3[0m


Epoch 43/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:51.15[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=43 step=430000[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003201262974739075, 'time_algorithm_update': 0.005278777408599854, 'loss': 0.0017439761573798023, 'time_step': 0.008574278116226196}[0m [36mstep[0m=[35m430000[0m
[2m2024-09-25 22:51.15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_430000.d3[0m


Epoch 44/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:52.42[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=44 step=440000[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0031901435852050783, 'time_algorithm_update': 0.005287797927856446, 'loss': 0.0017275681670056657, 'time_step': 0.00857188539505005}[0m [36mstep[0m=[35m440000[0m
[2m2024-09-25 22:52.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_440000.d3[0m


Epoch 45/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:54.08[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=45 step=450000[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00321422758102417, 'time_algorithm_update': 0.005281865310668946, 'loss': 0.001723025001445785, 'time_step': 0.008589842200279236}[0m [36mstep[0m=[35m450000[0m
[2m2024-09-25 22:54.08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_450000.d3[0m


Epoch 46/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:55.35[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=46 step=460000[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003187601947784424, 'time_algorithm_update': 0.005271096563339233, 'loss': 0.0017186768878251315, 'time_step': 0.008552875685691833}[0m [36mstep[0m=[35m460000[0m
[2m2024-09-25 22:55.35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_460000.d3[0m


Epoch 47/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:57.01[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=47 step=470000[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003206283617019653, 'time_algorithm_update': 0.005260637164115906, 'loss': 0.0017064968681312167, 'time_step': 0.008560397267341613}[0m [36mstep[0m=[35m470000[0m
[2m2024-09-25 22:57.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_470000.d3[0m


Epoch 48/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:58.27[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=48 step=480000[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003196132183074951, 'time_algorithm_update': 0.0052521427869796754, 'loss': 0.0016977629816683475, 'time_step': 0.008541935729980469}[0m [36mstep[0m=[35m480000[0m
[2m2024-09-25 22:58.27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_480000.d3[0m


Epoch 49/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:59.54[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=49 step=490000[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032192606925964358, 'time_algorithm_update': 0.005270116591453552, 'loss': 0.0016924618362914772, 'time_step': 0.00858362045288086}[0m [36mstep[0m=[35m490000[0m
[2m2024-09-25 22:59.54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_490000.d3[0m


Epoch 50/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:01.22[0m [[32m[1minfo     [0m] [1mBC_20240925214835: epoch=50 step=500000[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0032856996059417725, 'time_algorithm_update': 0.005364153575897217, 'loss': 0.001694572035188321, 'time_step': 0.008745985579490662}[0m [36mstep[0m=[35m500000[0m
[2m2024-09-25 23:01.22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925214835/model_500000.d3[0m


[(1,
  {'time_sample_batch': 0.0031838195085525513,
   'time_algorithm_update': 0.005411988949775696,
   'loss': 0.014970203944668173,
   'time_step': 0.00869471035003662}),
 (2,
  {'time_sample_batch': 0.0031653521299362182,
   'time_algorithm_update': 0.005235780000686646,
   'loss': 0.0069501314929220825,
   'time_step': 0.008497553396224976}),
 (3,
  {'time_sample_batch': 0.003161275577545166,
   'time_algorithm_update': 0.005229719948768616,
   'loss': 0.0051841985889012,
   'time_step': 0.008485549902915954}),
 (4,
  {'time_sample_batch': 0.003263209319114685,
   'time_algorithm_update': 0.005396114301681519,
   'loss': 0.004278661334328354,
   'time_step': 0.008754119086265564}),
 (5,
  {'time_sample_batch': 0.003275758147239685,
   'time_algorithm_update': 0.0054149534940719606,
   'loss': 0.003696728504204657,
   'time_step': 0.008785446071624755}),
 (6,
  {'time_sample_batch': 0.003268582773208618,
   'time_algorithm_update': 0.005420827221870423,
   'loss': 0.003337101981812

In [None]:
!zip -r model_tenth_dataset.zip d3rlpy_logs

  adding: d3rlpy_logs/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925214835/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925214835/model_90000.d3 (deflated 37%)
  adding: d3rlpy_logs/BC_20240925214835/time_algorithm_update.csv (deflated 60%)
  adding: d3rlpy_logs/BC_20240925214835/time_step.csv (deflated 59%)
  adding: d3rlpy_logs/BC_20240925214835/model_30000.d3 (deflated 24%)
  adding: d3rlpy_logs/BC_20240925214835/model_320000.d3 (deflated 38%)
  adding: d3rlpy_logs/BC_20240925214835/model_440000.d3 (deflated 38%)
  adding: d3rlpy_logs/BC_20240925214835/model_340000.d3 (deflated 38%)
  adding: d3rlpy_logs/BC_20240925214835/model_250000.d3 (deflated 38%)
  adding: d3rlpy_logs/BC_20240925214835/model_160000.d3 (deflated 38%)
  adding: d3rlpy_logs/BC_20240925214835/model_290000.d3 (deflated 38%)
  adding: d3rlpy_logs/BC_20240925214835/model_130000.d3 (deflated 38%)
  adding: d3rlpy_logs/BC_20240925214835/model_500000.d3 (deflated 38%)
  adding: d3rlpy_logs/BC_20240925214835/model_140000.d