 # Install/Import Packages and Download/Unzip Dataset

 ---

 The following code installs d3rlpy, imports all necessary dependencies, and downloads/unzips the dataset from Zenodo.

In [None]:
!pip install d3rlpy==2.6.1 gymnasium==0.29.1

Collecting d3rlpy
  Downloading d3rlpy-2.6.1-py3-none-any.whl.metadata (11 kB)
Collecting gym>=0.26.0 (from d3rlpy)
  Downloading gym-0.26.2.tar.gz (721 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/721.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting structlog (from d3rlpy)
  Downloading structlog-24.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting colorama (from d3rlpy)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting dataclasses-json (from d3rlpy)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting gymnasium (from d3rlpy)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting ma

In [None]:
from google.colab import files
import numpy as np
import d3rlpy
import os

In [None]:
!wget -O full_dataset.zip "https://zenodo.org/record/13830810/files/full_dataset.zip?download=1"

--2024-09-25 20:18:33--  https://zenodo.org/record/13830810/files/full_dataset.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.79.172, 188.184.98.238, 188.184.103.159, ...
Connecting to zenodo.org (zenodo.org)|188.185.79.172|:443... connected.
HTTP request sent, awaiting response... 301 MOVED PERMANENTLY
Location: /records/13830810/files/full_dataset.zip [following]
--2024-09-25 20:18:34--  https://zenodo.org/records/13830810/files/full_dataset.zip
Reusing existing connection to zenodo.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 2161582746 (2.0G) [application/octet-stream]
Saving to: ‘full_dataset.zip’


2024-09-25 20:20:49 (15.2 MB/s) - ‘full_dataset.zip’ saved [2161582746/2161582746]



In [None]:
!unzip full_dataset.zip -d /content/
data_dir = '/content/content/data/data'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/content/data/data/ep_38406.npz  
  inflating: /content/content/data/data/ep_45886.npz  
  inflating: /content/content/data/data/ep_80720.npz  
  inflating: /content/content/data/data/ep_41811.npz  
  inflating: /content/content/data/data/ep_74361.npz  
  inflating: /content/content/data/data/ep_36142.npz  
  inflating: /content/content/data/data/ep_92647.npz  
  inflating: /content/content/data/data/ep_35769.npz  
  inflating: /content/content/data/data/ep_70452.npz  
  inflating: /content/content/data/data/ep_89001.npz  
  inflating: /content/content/data/data/ep_92324.npz  
  inflating: /content/content/data/data/ep_21612.npz  
  inflating: /content/content/data/data/ep_51788.npz  
  inflating: /content/content/data/data/ep_51083.npz  
  inflating: /content/content/data/data/ep_54442.npz  
  inflating: /content/content/data/data/ep_97628.npz  
  inflating: /content/content/data/data/ep_26720.npz  


# Prepare Dataset for d3rlpy

---

The following code loads all of the data into arrays and creates a d3rlpy dataset.

In [None]:
total_steps = 5000000

all_observations = np.empty((total_steps, 3, 64, 64), dtype=np.uint8)
all_actions = np.empty((total_steps, 3), dtype=np.float32)
all_rewards = np.empty((total_steps,), dtype=np.float32)
all_terminals = np.empty((total_steps,), dtype=bool)

current_index = 0

for i in range(1, 50001):
    episode_file = os.path.join(data_dir, f'ep_{i}.npz')
    episode_data = np.load(episode_file)

    num_steps = episode_data['terminals'].shape[0]

    all_observations[current_index:current_index + num_steps] = np.transpose(episode_data['observations'], (0, 3, 1, 2))
    all_actions[current_index:current_index + num_steps] = episode_data['actions']
    all_rewards[current_index:current_index + num_steps] = episode_data['rewards']
    all_terminals[current_index:current_index + num_steps] = episode_data['terminals']

    current_index += num_steps

all_observations = all_observations[:current_index]
all_actions = all_actions[:current_index]
all_rewards = all_rewards[:current_index]
all_terminals = all_terminals[:current_index]

In [None]:
dataset = d3rlpy.dataset.MDPDataset(
    observations=all_observations,
    actions=all_actions,
    rewards=all_rewards,
    terminals=all_terminals,
)

[2m2024-09-25 20:30.19[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(3,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('uint8')], shape=[(3, 64, 64)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2024-09-25 20:30.19[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.CONTINUOUS: 1>[0m
[2m2024-09-25 20:30.19[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m3[0m


# Model Training

---

The following code trains a behavior cloning algorithm for 50 epochs at 10000 steps per epoch.

In [None]:
BC = d3rlpy.algos.BCConfig(observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),).create(device='cuda')
BC.fit(dataset=dataset, n_steps=500000, n_steps_per_epoch=10000)

[2m2024-09-25 20:30.21[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 64, 64)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-09-25 20:30.21[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/BC_20240925203021[0m
[2m2024-09-25 20:30.21[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-09-25 20:30.23[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-09-25 20:30.23[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 64, 64], 'action_size': 3, 'config': {'type': 'bc', 'params': {'batch_size': 100, 'gamma': 0.99, 'observation_scaler': {'type': 'pixel', 'params': {}}, 'act

Epoch 1/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:31.45[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=1 step=10000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028868495225906374, 'time_algorithm_update': 0.0051183199644088745, 'loss': 0.015720033557107674, 'time_step': 0.008097256326675415}[0m [36mstep[0m=[35m10000[0m
[2m2024-09-25 20:31.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_10000.d3[0m


Epoch 2/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:33.05[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=2 step=20000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028759174346923828, 'time_algorithm_update': 0.004972385549545288, 'loss': 0.009199370395438746, 'time_step': 0.007936518120765686}[0m [36mstep[0m=[35m20000[0m
[2m2024-09-25 20:33.05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_20000.d3[0m


Epoch 3/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:34.25[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=3 step=30000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028751471042633057, 'time_algorithm_update': 0.004967793107032776, 'loss': 0.008171565019059926, 'time_step': 0.007931830477714539}[0m [36mstep[0m=[35m30000[0m
[2m2024-09-25 20:34.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_30000.d3[0m


Epoch 4/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:35.45[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=4 step=40000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002876189041137695, 'time_algorithm_update': 0.004966652631759644, 'loss': 0.007651704471884295, 'time_step': 0.007932533311843872}[0m [36mstep[0m=[35m40000[0m
[2m2024-09-25 20:35.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_40000.d3[0m


Epoch 5/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:37.05[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=5 step=50000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002893471908569336, 'time_algorithm_update': 0.004987011528015136, 'loss': 0.007258199187950231, 'time_step': 0.00796983814239502}[0m [36mstep[0m=[35m50000[0m
[2m2024-09-25 20:37.05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_50000.d3[0m


Epoch 6/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:38.25[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=6 step=60000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028774746417999267, 'time_algorithm_update': 0.004984899401664734, 'loss': 0.0070141437286743895, 'time_step': 0.007951682996749879}[0m [36mstep[0m=[35m60000[0m
[2m2024-09-25 20:38.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_60000.d3[0m


Epoch 7/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:39.45[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=7 step=70000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002876620626449585, 'time_algorithm_update': 0.0049796142578125, 'loss': 0.006802578031271696, 'time_step': 0.007944852375984192}[0m [36mstep[0m=[35m70000[0m
[2m2024-09-25 20:39.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_70000.d3[0m


Epoch 8/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:41.05[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=8 step=80000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028805286169052125, 'time_algorithm_update': 0.004994393873214722, 'loss': 0.006648544688243419, 'time_step': 0.007963703322410583}[0m [36mstep[0m=[35m80000[0m
[2m2024-09-25 20:41.05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_80000.d3[0m


Epoch 9/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:42.26[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=9 step=90000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002877994894981384, 'time_algorithm_update': 0.0049823674201965335, 'loss': 0.0065355535040376705, 'time_step': 0.007948945879936218}[0m [36mstep[0m=[35m90000[0m
[2m2024-09-25 20:42.26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_90000.d3[0m


Epoch 10/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:43.46[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=10 step=100000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028797628164291384, 'time_algorithm_update': 0.004991058444976807, 'loss': 0.006413964353594929, 'time_step': 0.0079596031665802}[0m [36mstep[0m=[35m100000[0m
[2m2024-09-25 20:43.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_100000.d3[0m


Epoch 11/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:45.06[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=11 step=110000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028915796756744383, 'time_algorithm_update': 0.00498798394203186, 'loss': 0.0063173621759982776, 'time_step': 0.007969861793518067}[0m [36mstep[0m=[35m110000[0m
[2m2024-09-25 20:45.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_110000.d3[0m


Epoch 12/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:46.26[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=12 step=120000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002881033205986023, 'time_algorithm_update': 0.00498165237903595, 'loss': 0.006222468951228075, 'time_step': 0.007951294422149658}[0m [36mstep[0m=[35m120000[0m
[2m2024-09-25 20:46.26[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_120000.d3[0m


Epoch 13/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:47.46[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=13 step=130000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002879912495613098, 'time_algorithm_update': 0.004978754591941833, 'loss': 0.0061936324579175564, 'time_step': 0.007947372317314148}[0m [36mstep[0m=[35m130000[0m
[2m2024-09-25 20:47.46[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_130000.d3[0m


Epoch 14/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:49.07[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=14 step=140000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028937625408172606, 'time_algorithm_update': 0.0050075051307678226, 'loss': 0.00608673806199804, 'time_step': 0.007991316843032836}[0m [36mstep[0m=[35m140000[0m
[2m2024-09-25 20:49.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_140000.d3[0m


Epoch 15/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:50.27[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=15 step=150000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002882441711425781, 'time_algorithm_update': 0.004977071022987366, 'loss': 0.006045961228851229, 'time_step': 0.007948564934730529}[0m [36mstep[0m=[35m150000[0m
[2m2024-09-25 20:50.27[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_150000.d3[0m


Epoch 16/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:51.47[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=16 step=160000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028831231355667115, 'time_algorithm_update': 0.004988173484802246, 'loss': 0.005977762926602736, 'time_step': 0.007959271812438965}[0m [36mstep[0m=[35m160000[0m
[2m2024-09-25 20:51.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_160000.d3[0m


Epoch 17/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:53.08[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=17 step=170000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002894699549674988, 'time_algorithm_update': 0.0049954254150390625, 'loss': 0.005934597861696966, 'time_step': 0.00797893419265747}[0m [36mstep[0m=[35m170000[0m
[2m2024-09-25 20:53.08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_170000.d3[0m


Epoch 18/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:54.28[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=18 step=180000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028822922945022584, 'time_algorithm_update': 0.004975071096420288, 'loss': 0.0058750169517006725, 'time_step': 0.00794644250869751}[0m [36mstep[0m=[35m180000[0m
[2m2024-09-25 20:54.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_180000.d3[0m


Epoch 19/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:55.48[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=19 step=190000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00289150173664093, 'time_algorithm_update': 0.004986921238899231, 'loss': 0.005824985619308427, 'time_step': 0.00796663658618927}[0m [36mstep[0m=[35m190000[0m
[2m2024-09-25 20:55.48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_190000.d3[0m


Epoch 20/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:57.08[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=20 step=200000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028864365577697753, 'time_algorithm_update': 0.00498783872127533, 'loss': 0.005800969593739137, 'time_step': 0.007963239812850952}[0m [36mstep[0m=[35m200000[0m
[2m2024-09-25 20:57.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_200000.d3[0m


Epoch 21/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:58.29[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=21 step=210000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002891377544403076, 'time_algorithm_update': 0.004989620137214661, 'loss': 0.005751794103928841, 'time_step': 0.0079703955411911}[0m [36mstep[0m=[35m210000[0m
[2m2024-09-25 20:58.29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_210000.d3[0m


Epoch 22/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 20:59.49[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=22 step=220000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00289126718044281, 'time_algorithm_update': 0.004996429538726806, 'loss': 0.0057174169937614355, 'time_step': 0.00797601375579834}[0m [36mstep[0m=[35m220000[0m
[2m2024-09-25 20:59.49[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_220000.d3[0m


Epoch 23/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:01.10[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=23 step=230000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00289809136390686, 'time_algorithm_update': 0.004999624752998352, 'loss': 0.0057005834304029125, 'time_step': 0.007987101912498474}[0m [36mstep[0m=[35m230000[0m
[2m2024-09-25 21:01.10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_230000.d3[0m


Epoch 24/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:02.30[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=24 step=240000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028976077795028685, 'time_algorithm_update': 0.005004309749603271, 'loss': 0.0056556495542638, 'time_step': 0.00799165759086609}[0m [36mstep[0m=[35m240000[0m
[2m2024-09-25 21:02.30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_240000.d3[0m


Epoch 25/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:03.51[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=25 step=250000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028839039325714113, 'time_algorithm_update': 0.004989472389221192, 'loss': 0.00562981627676636, 'time_step': 0.007962279319763184}[0m [36mstep[0m=[35m250000[0m
[2m2024-09-25 21:03.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_250000.d3[0m


Epoch 26/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:05.11[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=26 step=260000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028915598154067995, 'time_algorithm_update': 0.004993664383888244, 'loss': 0.005617067178059369, 'time_step': 0.007975101137161255}[0m [36mstep[0m=[35m260000[0m
[2m2024-09-25 21:05.11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_260000.d3[0m


Epoch 27/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:06.31[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=27 step=270000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002891999578475952, 'time_algorithm_update': 0.004993215250968933, 'loss': 0.005582685679965653, 'time_step': 0.007974533915519715}[0m [36mstep[0m=[35m270000[0m
[2m2024-09-25 21:06.31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_270000.d3[0m


Epoch 28/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:07.52[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=28 step=280000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028945730209350585, 'time_algorithm_update': 0.00499413571357727, 'loss': 0.005558481093053706, 'time_step': 0.00797824559211731}[0m [36mstep[0m=[35m280000[0m
[2m2024-09-25 21:07.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_280000.d3[0m


Epoch 29/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:09.13[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=29 step=290000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0029152164697647093, 'time_algorithm_update': 0.005013199734687805, 'loss': 0.0055378470700467005, 'time_step': 0.008018548631668091}[0m [36mstep[0m=[35m290000[0m
[2m2024-09-25 21:09.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_290000.d3[0m


Epoch 30/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:10.33[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=30 step=300000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028964500427246095, 'time_algorithm_update': 0.004988484001159668, 'loss': 0.005510478465654887, 'time_step': 0.007974238872528076}[0m [36mstep[0m=[35m300000[0m
[2m2024-09-25 21:10.33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_300000.d3[0m


Epoch 31/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:11.53[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=31 step=310000[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002896696090698242, 'time_algorithm_update': 0.004990161490440369, 'loss': 0.0054744249704992394, 'time_step': 0.007976158714294433}[0m [36mstep[0m=[35m310000[0m
[2m2024-09-25 21:11.53[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_310000.d3[0m


Epoch 32/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:13.14[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=32 step=320000[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00291691677570343, 'time_algorithm_update': 0.005009738850593567, 'loss': 0.005458622028888203, 'time_step': 0.008017003774642944}[0m [36mstep[0m=[35m320000[0m
[2m2024-09-25 21:13.14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_320000.d3[0m


Epoch 33/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:14.35[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=33 step=330000[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002894664931297302, 'time_algorithm_update': 0.0050009431838989255, 'loss': 0.005440489583951421, 'time_step': 0.007984747433662415}[0m [36mstep[0m=[35m330000[0m
[2m2024-09-25 21:14.35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_330000.d3[0m


Epoch 34/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:15.55[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=34 step=340000[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0029023752212524413, 'time_algorithm_update': 0.005014666986465454, 'loss': 0.005427666074153967, 'time_step': 0.008006637454032899}[0m [36mstep[0m=[35m340000[0m
[2m2024-09-25 21:15.55[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_340000.d3[0m


Epoch 35/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:17.16[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=35 step=350000[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002901632285118103, 'time_algorithm_update': 0.005009842467308044, 'loss': 0.005406770682986826, 'time_step': 0.00800188615322113}[0m [36mstep[0m=[35m350000[0m
[2m2024-09-25 21:17.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_350000.d3[0m


Epoch 36/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:18.37[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=36 step=360000[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028885692834854128, 'time_algorithm_update': 0.004999904632568359, 'loss': 0.005394489792780951, 'time_step': 0.007978286719322205}[0m [36mstep[0m=[35m360000[0m
[2m2024-09-25 21:18.37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_360000.d3[0m


Epoch 37/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:19.57[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=37 step=370000[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002894082999229431, 'time_algorithm_update': 0.005004569888114929, 'loss': 0.005362924561975524, 'time_step': 0.007988834691047669}[0m [36mstep[0m=[35m370000[0m
[2m2024-09-25 21:19.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_370000.d3[0m


Epoch 38/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:21.18[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=38 step=380000[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028941951990127564, 'time_algorithm_update': 0.004995569896697998, 'loss': 0.005362370292958804, 'time_step': 0.007980266618728638}[0m [36mstep[0m=[35m380000[0m
[2m2024-09-25 21:21.18[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_380000.d3[0m


Epoch 39/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:22.38[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=39 step=390000[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028896835803985596, 'time_algorithm_update': 0.005000794196128845, 'loss': 0.005341251284955069, 'time_step': 0.007979707551002503}[0m [36mstep[0m=[35m390000[0m
[2m2024-09-25 21:22.38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_390000.d3[0m


Epoch 40/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:23.59[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=40 step=400000[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002894097399711609, 'time_algorithm_update': 0.005011829924583435, 'loss': 0.0053255080251488835, 'time_step': 0.007996177124977112}[0m [36mstep[0m=[35m400000[0m
[2m2024-09-25 21:23.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_400000.d3[0m


Epoch 41/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:25.19[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=41 step=410000[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0029083035230636598, 'time_algorithm_update': 0.00501422107219696, 'loss': 0.005315644161868841, 'time_step': 0.008013530850410462}[0m [36mstep[0m=[35m410000[0m
[2m2024-09-25 21:25.19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_410000.d3[0m


Epoch 42/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:26.40[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=42 step=420000[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028970450401306154, 'time_algorithm_update': 0.004996422958374024, 'loss': 0.0052926149639301, 'time_step': 0.007983182287216187}[0m [36mstep[0m=[35m420000[0m
[2m2024-09-25 21:26.40[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_420000.d3[0m


Epoch 43/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:28.01[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=43 step=430000[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0029077991008758545, 'time_algorithm_update': 0.005014644050598144, 'loss': 0.005296403180249035, 'time_step': 0.008012501049041748}[0m [36mstep[0m=[35m430000[0m
[2m2024-09-25 21:28.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_430000.d3[0m


Epoch 44/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:29.21[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=44 step=440000[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0029138297080993653, 'time_algorithm_update': 0.005004776835441589, 'loss': 0.005277136359689757, 'time_step': 0.008009778189659119}[0m [36mstep[0m=[35m440000[0m
[2m2024-09-25 21:29.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_440000.d3[0m


Epoch 45/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:30.42[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=45 step=450000[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028976734161376954, 'time_algorithm_update': 0.005002017045021057, 'loss': 0.005247803047089838, 'time_step': 0.007989870023727417}[0m [36mstep[0m=[35m450000[0m
[2m2024-09-25 21:30.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_450000.d3[0m


Epoch 46/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:32.03[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=46 step=460000[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002906216883659363, 'time_algorithm_update': 0.005005884027481079, 'loss': 0.005235027626878582, 'time_step': 0.008002981114387513}[0m [36mstep[0m=[35m460000[0m
[2m2024-09-25 21:32.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_460000.d3[0m


Epoch 47/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:33.23[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=47 step=470000[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002898421406745911, 'time_algorithm_update': 0.004994822239875793, 'loss': 0.005249882042524405, 'time_step': 0.007983473873138427}[0m [36mstep[0m=[35m470000[0m
[2m2024-09-25 21:33.23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_470000.d3[0m


Epoch 48/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:34.44[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=48 step=480000[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028948906183242796, 'time_algorithm_update': 0.0049961089849472045, 'loss': 0.00522645049393177, 'time_step': 0.007981226229667664}[0m [36mstep[0m=[35m480000[0m
[2m2024-09-25 21:34.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_480000.d3[0m


Epoch 49/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:36.04[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=49 step=490000[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028960471630096437, 'time_algorithm_update': 0.005007071328163147, 'loss': 0.0052135687610134485, 'time_step': 0.007993524765968324}[0m [36mstep[0m=[35m490000[0m
[2m2024-09-25 21:36.04[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_490000.d3[0m


Epoch 50/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:37.25[0m [[32m[1minfo     [0m] [1mBC_20240925203021: epoch=50 step=500000[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002898890233039856, 'time_algorithm_update': 0.005013119626045227, 'loss': 0.005187341761682182, 'time_step': 0.00800225706100464}[0m [36mstep[0m=[35m500000[0m
[2m2024-09-25 21:37.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925203021/model_500000.d3[0m


[(1,
  {'time_sample_batch': 0.0028868495225906374,
   'time_algorithm_update': 0.0051183199644088745,
   'loss': 0.015720033557107674,
   'time_step': 0.008097256326675415}),
 (2,
  {'time_sample_batch': 0.0028759174346923828,
   'time_algorithm_update': 0.004972385549545288,
   'loss': 0.009199370395438746,
   'time_step': 0.007936518120765686}),
 (3,
  {'time_sample_batch': 0.0028751471042633057,
   'time_algorithm_update': 0.004967793107032776,
   'loss': 0.008171565019059926,
   'time_step': 0.007931830477714539}),
 (4,
  {'time_sample_batch': 0.002876189041137695,
   'time_algorithm_update': 0.004966652631759644,
   'loss': 0.007651704471884295,
   'time_step': 0.007932533311843872}),
 (5,
  {'time_sample_batch': 0.002893471908569336,
   'time_algorithm_update': 0.004987011528015136,
   'loss': 0.007258199187950231,
   'time_step': 0.00796983814239502}),
 (6,
  {'time_sample_batch': 0.0028774746417999267,
   'time_algorithm_update': 0.004984899401664734,
   'loss': 0.007014143728

In [None]:
!zip -r model_half_dataset.zip d3rlpy_logs

  adding: d3rlpy_logs/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925203021/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925203021/model_90000.d3 (deflated 35%)
  adding: d3rlpy_logs/BC_20240925203021/time_algorithm_update.csv (deflated 61%)
  adding: d3rlpy_logs/BC_20240925203021/time_step.csv (deflated 61%)
  adding: d3rlpy_logs/BC_20240925203021/model_30000.d3 (deflated 22%)
  adding: d3rlpy_logs/BC_20240925203021/model_320000.d3 (deflated 39%)
  adding: d3rlpy_logs/BC_20240925203021/model_440000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925203021/model_340000.d3 (deflated 39%)
  adding: d3rlpy_logs/BC_20240925203021/model_250000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925203021/model_160000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925203021/model_290000.d3 (deflated 40%)
  adding: d3rlpy_logs/BC_20240925203021/model_130000.d3 (deflated 39%)
  adding: d3rlpy_logs/BC_20240925203021/model_500000.d3 (deflated 39%)
  adding: d3rlpy_logs/BC_20240925203021/model_140000.d