 # Install/Import Packages and Download/Unzip Dataset

 ---

 The following code installs d3rlpy, imports all necessary dependencies, and downloads/unzips the dataset from Zenodo.

In [None]:
!pip install d3rlpy==2.6.1 gymnasium==0.29.1

Collecting d3rlpy
  Downloading d3rlpy-2.6.1-py3-none-any.whl.metadata (11 kB)
Collecting gym>=0.26.0 (from d3rlpy)
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting structlog (from d3rlpy)
  Downloading structlog-24.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting colorama (from d3rlpy)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting dataclasses-json (from d3rlpy)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting gymnasium (from d3rlpy)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->d3rlpy)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata 

In [None]:
from google.colab import files
import numpy as np
import d3rlpy
import os

In [None]:
!wget -O full_dataset.zip "https://zenodo.org/record/13830810/files/full_dataset.zip?download=1"

--2024-09-25 20:18:24--  https://zenodo.org/record/13830810/files/full_dataset.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.184.98.238, 188.184.103.159, 188.185.79.172, ...
Connecting to zenodo.org (zenodo.org)|188.184.98.238|:443... connected.
HTTP request sent, awaiting response... 301 MOVED PERMANENTLY
Location: /records/13830810/files/full_dataset.zip [following]
--2024-09-25 20:18:25--  https://zenodo.org/records/13830810/files/full_dataset.zip
Reusing existing connection to zenodo.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 2161582746 (2.0G) [application/octet-stream]
Saving to: ‘full_dataset.zip’


2024-09-25 20:19:55 (22.8 MB/s) - ‘full_dataset.zip’ saved [2161582746/2161582746]



In [None]:
!unzip full_dataset.zip -d /content/
data_dir = '/content/content/data/data'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/content/data/data/ep_38406.npz  
  inflating: /content/content/data/data/ep_45886.npz  
  inflating: /content/content/data/data/ep_80720.npz  
  inflating: /content/content/data/data/ep_41811.npz  
  inflating: /content/content/data/data/ep_74361.npz  
  inflating: /content/content/data/data/ep_36142.npz  
  inflating: /content/content/data/data/ep_92647.npz  
  inflating: /content/content/data/data/ep_35769.npz  
  inflating: /content/content/data/data/ep_70452.npz  
  inflating: /content/content/data/data/ep_89001.npz  
  inflating: /content/content/data/data/ep_92324.npz  
  inflating: /content/content/data/data/ep_21612.npz  
  inflating: /content/content/data/data/ep_51788.npz  
  inflating: /content/content/data/data/ep_51083.npz  
  inflating: /content/content/data/data/ep_54442.npz  
  inflating: /content/content/data/data/ep_97628.npz  
  inflating: /content/content/data/data/ep_26720.npz  


# Prepare Dataset for d3rlpy

---

The following code loads all of the data into arrays and creates a d3rlpy dataset.

In [None]:
total_steps = 5000000

all_observations = np.empty((total_steps, 3, 64, 64), dtype=np.uint8)
all_actions = np.empty((total_steps, 3), dtype=np.float32)
all_rewards = np.empty((total_steps,), dtype=np.float32)
all_terminals = np.empty((total_steps,), dtype=bool)

current_index = 0

for i in range(1, 99523):
    episode_file = os.path.join(data_dir, f'ep_{i}.npz')
    episode_data = np.load(episode_file)

    num_steps = episode_data['terminals'].shape[0]

    all_observations[current_index:current_index + num_steps] = np.transpose(episode_data['observations'], (0, 3, 1, 2))
    all_actions[current_index:current_index + num_steps] = episode_data['actions']
    all_rewards[current_index:current_index + num_steps] = episode_data['rewards']
    all_terminals[current_index:current_index + num_steps] = episode_data['terminals']

    current_index += num_steps

all_observations = all_observations[:current_index]
all_actions = all_actions[:current_index]
all_rewards = all_rewards[:current_index]
all_terminals = all_terminals[:current_index]

In [None]:
dataset = d3rlpy.dataset.MDPDataset(
    observations=all_observations,
    actions=all_actions,
    rewards=all_rewards,
    terminals=all_terminals,
)

[2m2024-09-25 20:34.23[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(3,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('uint8')], shape=[(3, 64, 64)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2024-09-25 20:34.23[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.CONTINUOUS: 1>[0m
[2m2024-09-25 20:34.23[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m3[0m


# Model Training

---

The following code trains a behavior cloning algorithm for 50 epochs at 10000 steps per epoch.

In [None]:
BC = d3rlpy.algos.BCConfig(observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),).create(device='cuda')
BC.fit(dataset=dataset, n_steps=500000, n_steps_per_epoch=10000)

[2m2024-09-25 20:34.27[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 64, 64)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-09-25 20:34.27[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/BC_20240925203427[0m
[2m2024-09-25 20:34.27[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-09-25 20:34.29[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-09-25 20:34.29[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 64, 64], 'action_size': 3, 'config': {'type': 'bc', 'params': {'batch_size': 100, 'gamma': 0.99, 'observation_scaler': {'type': 'pixel', 'params': {}}, 'act

Epoch 1/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:36.19[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=1 step=10000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.005085845327377319, 'time_algorithm_update': 0.005729454493522644, 'loss': 0.016795805045915768, 'time_step': 0.010919874453544616}[0m [36mstep[0m=[35m10000[0m
[2m2024-09-25 20:36.19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_10000.d3[0m


Epoch 2/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:37.50[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=2 step=20000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003478450655937195, 'time_algorithm_update': 0.0054514554977416994, 'loss': 0.009716768994787708, 'time_step': 0.0090264888048172}[0m [36mstep[0m=[35m20000[0m
[2m2024-09-25 20:37.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_20000.d3[0m


Epoch 3/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:39.22[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=3 step=30000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035152724742889404, 'time_algorithm_update': 0.005485492253303528, 'loss': 0.008580144820408895, 'time_step': 0.009100013375282288}[0m [36mstep[0m=[35m30000[0m
[2m2024-09-25 20:39.22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_30000.d3[0m


Epoch 4/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:40.51[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=4 step=40000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003388635230064392, 'time_algorithm_update': 0.005326024293899536, 'loss': 0.008064004987711086, 'time_step': 0.00881223132610321}[0m [36mstep[0m=[35m40000[0m
[2m2024-09-25 20:40.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_40000.d3[0m


Epoch 5/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:42.20[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=5 step=50000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0033934303283691407, 'time_algorithm_update': 0.005344832801818847, 'loss': 0.007780826750583947, 'time_step': 0.008835095381736756}[0m [36mstep[0m=[35m50000[0m
[2m2024-09-25 20:42.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_50000.d3[0m


Epoch 6/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:43.53[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=6 step=60000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035825555562973023, 'time_algorithm_update': 0.0055952815532684325, 'loss': 0.007578831517347135, 'time_step': 0.009278037714958191}[0m [36mstep[0m=[35m60000[0m
[2m2024-09-25 20:43.53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_60000.d3[0m


Epoch 7/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:45.26[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=7 step=70000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003566793251037598, 'time_algorithm_update': 0.005555289053916931, 'loss': 0.007441544285463169, 'time_step': 0.00922290108203888}[0m [36mstep[0m=[35m70000[0m
[2m2024-09-25 20:45.26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_70000.d3[0m


Epoch 8/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:46.59[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=8 step=80000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035489442348480226, 'time_algorithm_update': 0.005521813488006592, 'loss': 0.007323589368793182, 'time_step': 0.009171011519432068}[0m [36mstep[0m=[35m80000[0m
[2m2024-09-25 20:46.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_80000.d3[0m


Epoch 9/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:48.31[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=9 step=90000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035409818172454834, 'time_algorithm_update': 0.005525941157341003, 'loss': 0.0072251499527599666, 'time_step': 0.00916682767868042}[0m [36mstep[0m=[35m90000[0m
[2m2024-09-25 20:48.31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_90000.d3[0m


Epoch 10/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:50.04[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=10 step=100000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003536817812919617, 'time_algorithm_update': 0.005509984564781189, 'loss': 0.00716100256559439, 'time_step': 0.009147153854370118}[0m [36mstep[0m=[35m100000[0m
[2m2024-09-25 20:50.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_100000.d3[0m


Epoch 11/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:51.37[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=11 step=110000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035687182426452635, 'time_algorithm_update': 0.005551535177230835, 'loss': 0.007091446132026613, 'time_step': 0.00922123532295227}[0m [36mstep[0m=[35m110000[0m
[2m2024-09-25 20:51.37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_110000.d3[0m


Epoch 12/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:53.10[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=12 step=120000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003583347773551941, 'time_algorithm_update': 0.005564481663703919, 'loss': 0.00702634807403665, 'time_step': 0.009247445940971374}[0m [36mstep[0m=[35m120000[0m
[2m2024-09-25 20:53.10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_120000.d3[0m


Epoch 13/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:54.42[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=13 step=130000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003530401062965393, 'time_algorithm_update': 0.005482872653007507, 'loss': 0.00700454310933128, 'time_step': 0.009112935471534729}[0m [36mstep[0m=[35m130000[0m
[2m2024-09-25 20:54.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_130000.d3[0m


Epoch 14/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:56.14[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=14 step=140000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003539180016517639, 'time_algorithm_update': 0.005509289598464966, 'loss': 0.006938186130044051, 'time_step': 0.00914825632572174}[0m [36mstep[0m=[35m140000[0m
[2m2024-09-25 20:56.14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_140000.d3[0m


Epoch 15/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:57.46[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=15 step=150000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035369857549667357, 'time_algorithm_update': 0.005502612257003784, 'loss': 0.006881369880214334, 'time_step': 0.009140145111083985}[0m [36mstep[0m=[35m150000[0m
[2m2024-09-25 20:57.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_150000.d3[0m


Epoch 16/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:59.18[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=16 step=160000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035246953725814817, 'time_algorithm_update': 0.00548414888381958, 'loss': 0.006855894709378481, 'time_step': 0.009109569907188416}[0m [36mstep[0m=[35m160000[0m
[2m2024-09-25 20:59.18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_160000.d3[0m


Epoch 17/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:00.50[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=17 step=170000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035328163146972655, 'time_algorithm_update': 0.005491970634460449, 'loss': 0.006811480194865726, 'time_step': 0.009125300574302673}[0m [36mstep[0m=[35m170000[0m
[2m2024-09-25 21:00.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_170000.d3[0m


Epoch 18/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:02.23[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=18 step=180000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035443550348281862, 'time_algorithm_update': 0.00551385726928711, 'loss': 0.006790973343513906, 'time_step': 0.009157680487632752}[0m [36mstep[0m=[35m180000[0m
[2m2024-09-25 21:02.23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_180000.d3[0m


Epoch 19/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:03.55[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=19 step=190000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035321502923965453, 'time_algorithm_update': 0.005489742851257324, 'loss': 0.006769769777939655, 'time_step': 0.009122297763824463}[0m [36mstep[0m=[35m190000[0m
[2m2024-09-25 21:03.55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_190000.d3[0m


Epoch 20/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:05.27[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=20 step=200000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003536389708518982, 'time_algorithm_update': 0.00550086669921875, 'loss': 0.006765798450703733, 'time_step': 0.009138674521446228}[0m [36mstep[0m=[35m200000[0m
[2m2024-09-25 21:05.27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_200000.d3[0m


Epoch 21/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:06.59[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=21 step=210000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035493433475494384, 'time_algorithm_update': 0.005515239429473877, 'loss': 0.006729416628764011, 'time_step': 0.009165003728866578}[0m [36mstep[0m=[35m210000[0m
[2m2024-09-25 21:06.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_210000.d3[0m


Epoch 22/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:08.32[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=22 step=220000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035540294647216798, 'time_algorithm_update': 0.005517606616020202, 'loss': 0.006688641062146053, 'time_step': 0.009172220253944397}[0m [36mstep[0m=[35m220000[0m
[2m2024-09-25 21:08.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_220000.d3[0m


Epoch 23/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:10.04[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=23 step=230000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035444323301315308, 'time_algorithm_update': 0.00549018702507019, 'loss': 0.0066880716252839196, 'time_step': 0.009135419535636902}[0m [36mstep[0m=[35m230000[0m
[2m2024-09-25 21:10.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_230000.d3[0m


Epoch 24/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:11.36[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=24 step=240000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035478068590164185, 'time_algorithm_update': 0.005525291681289673, 'loss': 0.006660766705102288, 'time_step': 0.009173976087570191}[0m [36mstep[0m=[35m240000[0m
[2m2024-09-25 21:11.36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_240000.d3[0m


Epoch 25/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:13.09[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=25 step=250000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035470678091049195, 'time_algorithm_update': 0.005516398048400879, 'loss': 0.006666799467802048, 'time_step': 0.009165027379989624}[0m [36mstep[0m=[35m250000[0m
[2m2024-09-25 21:13.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_250000.d3[0m


Epoch 26/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:14.41[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=26 step=260000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003548353099822998, 'time_algorithm_update': 0.005524525880813599, 'loss': 0.006612633591657505, 'time_step': 0.00917374415397644}[0m [36mstep[0m=[35m260000[0m
[2m2024-09-25 21:14.41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_260000.d3[0m


Epoch 27/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:16.14[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=27 step=270000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003545440459251404, 'time_algorithm_update': 0.005508361649513244, 'loss': 0.006631519473181106, 'time_step': 0.009154252076148986}[0m [36mstep[0m=[35m270000[0m
[2m2024-09-25 21:16.14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_270000.d3[0m


Epoch 28/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:17.48[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=28 step=280000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0036026078939437865, 'time_algorithm_update': 0.005597547340393067, 'loss': 0.006593762882845476, 'time_step': 0.009301654481887817}[0m [36mstep[0m=[35m280000[0m
[2m2024-09-25 21:17.48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_280000.d3[0m


Epoch 29/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:19.25[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=29 step=290000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0037386781692504882, 'time_algorithm_update': 0.005794065308570862, 'loss': 0.006583747274940833, 'time_step': 0.009637547969818114}[0m [36mstep[0m=[35m290000[0m
[2m2024-09-25 21:19.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_290000.d3[0m


Epoch 30/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:20.57[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=30 step=300000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003548429870605469, 'time_algorithm_update': 0.005510919308662415, 'loss': 0.0065772976514650505, 'time_step': 0.009160216617584229}[0m [36mstep[0m=[35m300000[0m
[2m2024-09-25 21:20.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_300000.d3[0m


Epoch 31/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:22.31[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=31 step=310000[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003608881449699402, 'time_algorithm_update': 0.005586337327957153, 'loss': 0.006532929755351506, 'time_step': 0.009297858953475952}[0m [36mstep[0m=[35m310000[0m
[2m2024-09-25 21:22.31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_310000.d3[0m


Epoch 32/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:24.04[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=32 step=320000[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035855057716369628, 'time_algorithm_update': 0.005577677512168884, 'loss': 0.006520638443436474, 'time_step': 0.009264845085144044}[0m [36mstep[0m=[35m320000[0m
[2m2024-09-25 21:24.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_320000.d3[0m


Epoch 33/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:25.37[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=33 step=330000[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035512564182281496, 'time_algorithm_update': 0.005514996123313904, 'loss': 0.006537680931412615, 'time_step': 0.00916704535484314}[0m [36mstep[0m=[35m330000[0m
[2m2024-09-25 21:25.37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_330000.d3[0m


Epoch 34/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:27.09[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=34 step=340000[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003554733943939209, 'time_algorithm_update': 0.005516303658485412, 'loss': 0.006511858857423067, 'time_step': 0.009172600674629211}[0m [36mstep[0m=[35m340000[0m
[2m2024-09-25 21:27.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_340000.d3[0m


Epoch 35/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:28.42[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=35 step=350000[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003547036695480347, 'time_algorithm_update': 0.005511889362335205, 'loss': 0.00651439123400487, 'time_step': 0.009158827686309814}[0m [36mstep[0m=[35m350000[0m
[2m2024-09-25 21:28.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_350000.d3[0m


Epoch 36/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:30.15[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=36 step=360000[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035740213871002196, 'time_algorithm_update': 0.005527728652954101, 'loss': 0.006477953896392137, 'time_step': 0.009203260612487793}[0m [36mstep[0m=[35m360000[0m
[2m2024-09-25 21:30.15[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_360000.d3[0m


Epoch 37/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:31.47[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=37 step=370000[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035495559692382815, 'time_algorithm_update': 0.00551397397518158, 'loss': 0.006451360551989637, 'time_step': 0.009164960741996766}[0m [36mstep[0m=[35m370000[0m
[2m2024-09-25 21:31.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_370000.d3[0m


Epoch 38/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:33.20[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=38 step=380000[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003555476212501526, 'time_algorithm_update': 0.005526293182373047, 'loss': 0.006448095747944899, 'time_step': 0.009182513403892516}[0m [36mstep[0m=[35m380000[0m
[2m2024-09-25 21:33.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_380000.d3[0m


Epoch 39/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:34.52[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=39 step=390000[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003543656039237976, 'time_algorithm_update': 0.005502722978591919, 'loss': 0.006453957942617126, 'time_step': 0.00914675850868225}[0m [36mstep[0m=[35m390000[0m
[2m2024-09-25 21:34.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_390000.d3[0m


Epoch 40/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:36.25[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=40 step=400000[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035610430002212526, 'time_algorithm_update': 0.0055240790605545044, 'loss': 0.006462867989949882, 'time_step': 0.00918675856590271}[0m [36mstep[0m=[35m400000[0m
[2m2024-09-25 21:36.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_400000.d3[0m


Epoch 41/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:37.57[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=41 step=410000[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035535903453826905, 'time_algorithm_update': 0.0055254107475280765, 'loss': 0.006424007442384027, 'time_step': 0.009181261253356934}[0m [36mstep[0m=[35m410000[0m
[2m2024-09-25 21:37.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_410000.d3[0m


Epoch 42/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:39.30[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=42 step=420000[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035643757343292236, 'time_algorithm_update': 0.005543840289115906, 'loss': 0.006439513516100124, 'time_step': 0.009211205792427063}[0m [36mstep[0m=[35m420000[0m
[2m2024-09-25 21:39.30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_420000.d3[0m


Epoch 43/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:41.02[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=43 step=430000[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003554590940475464, 'time_algorithm_update': 0.005500020623207092, 'loss': 0.006431806366355158, 'time_step': 0.009156385946273804}[0m [36mstep[0m=[35m430000[0m
[2m2024-09-25 21:41.02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_430000.d3[0m


Epoch 44/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:42.35[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=44 step=440000[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035521427869796753, 'time_algorithm_update': 0.005506949639320373, 'loss': 0.006423423914029263, 'time_step': 0.009160315322875976}[0m [36mstep[0m=[35m440000[0m
[2m2024-09-25 21:42.35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_440000.d3[0m


Epoch 45/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:44.07[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=45 step=450000[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035528393268585206, 'time_algorithm_update': 0.005514139318466187, 'loss': 0.006408340954617597, 'time_step': 0.009168441152572631}[0m [36mstep[0m=[35m450000[0m
[2m2024-09-25 21:44.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_450000.d3[0m


Epoch 46/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:45.40[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=46 step=460000[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035499491930007935, 'time_algorithm_update': 0.0054978219509124755, 'loss': 0.00640856314485427, 'time_step': 0.009149144458770752}[0m [36mstep[0m=[35m460000[0m
[2m2024-09-25 21:45.40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_460000.d3[0m


Epoch 47/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:47.12[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=47 step=470000[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003548729944229126, 'time_algorithm_update': 0.0055203977346420284, 'loss': 0.006405851153167896, 'time_step': 0.009170429611206055}[0m [36mstep[0m=[35m470000[0m
[2m2024-09-25 21:47.12[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_470000.d3[0m


Epoch 48/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:48.45[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=48 step=480000[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003558056449890137, 'time_algorithm_update': 0.005536171865463257, 'loss': 0.006377150722546503, 'time_step': 0.009196794772148133}[0m [36mstep[0m=[35m480000[0m
[2m2024-09-25 21:48.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_480000.d3[0m


Epoch 49/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:50.17[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=49 step=490000[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035537765979766845, 'time_algorithm_update': 0.005517019534111023, 'loss': 0.006368977200239897, 'time_step': 0.009173365831375123}[0m [36mstep[0m=[35m490000[0m
[2m2024-09-25 21:50.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_490000.d3[0m


Epoch 50/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:51.50[0m [[32m[1minfo     [0m] [1mBC_20240925203427: epoch=50 step=500000[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0035849644899368285, 'time_algorithm_update': 0.005532838296890259, 'loss': 0.006381155525543727, 'time_step': 0.00922063581943512}[0m [36mstep[0m=[35m500000[0m
[2m2024-09-25 21:51.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203427/model_500000.d3[0m


[(1,
  {'time_sample_batch': 0.005085845327377319,
   'time_algorithm_update': 0.005729454493522644,
   'loss': 0.016795805045915768,
   'time_step': 0.010919874453544616}),
 (2,
  {'time_sample_batch': 0.003478450655937195,
   'time_algorithm_update': 0.0054514554977416994,
   'loss': 0.009716768994787708,
   'time_step': 0.0090264888048172}),
 (3,
  {'time_sample_batch': 0.0035152724742889404,
   'time_algorithm_update': 0.005485492253303528,
   'loss': 0.008580144820408895,
   'time_step': 0.009100013375282288}),
 (4,
  {'time_sample_batch': 0.003388635230064392,
   'time_algorithm_update': 0.005326024293899536,
   'loss': 0.008064004987711086,
   'time_step': 0.00881223132610321}),
 (5,
  {'time_sample_batch': 0.0033934303283691407,
   'time_algorithm_update': 0.005344832801818847,
   'loss': 0.007780826750583947,
   'time_step': 0.008835095381736756}),
 (6,
  {'time_sample_batch': 0.0035825555562973023,
   'time_algorithm_update': 0.0055952815532684325,
   'loss': 0.00757883151734

In [None]:
!zip -r model_full_dataset.zip d3rlpy_logs

  adding: d3rlpy_logs/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925203427/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925203427/model_90000.d3 (deflated 44%)
  adding: d3rlpy_logs/BC_20240925203427/time_algorithm_update.csv (deflated 60%)
  adding: d3rlpy_logs/BC_20240925203427/time_step.csv (deflated 60%)
  adding: d3rlpy_logs/BC_20240925203427/model_30000.d3 (deflated 27%)
  adding: d3rlpy_logs/BC_20240925203427/model_320000.d3 (deflated 47%)
  adding: d3rlpy_logs/BC_20240925203427/model_440000.d3 (deflated 48%)
  adding: d3rlpy_logs/BC_20240925203427/model_340000.d3 (deflated 47%)
  adding: d3rlpy_logs/BC_20240925203427/model_250000.d3 (deflated 47%)
  adding: d3rlpy_logs/BC_20240925203427/model_160000.d3 (deflated 47%)
  adding: d3rlpy_logs/BC_20240925203427/model_290000.d3 (deflated 47%)
  adding: d3rlpy_logs/BC_20240925203427/model_130000.d3 (deflated 46%)
  adding: d3rlpy_logs/BC_20240925203427/model_500000.d3 (deflated 47%)
  adding: d3rlpy_logs/BC_20240925203427/model_140000.d