 # Install/Import Packages and Download/Unzip Dataset

 ---

 The following code installs d3rlpy, imports all necessary dependencies, and downloads/unzips the dataset from Zenodo.

In [None]:
!pip install d3rlpy==2.6.1 gymnasium==0.29.1

Collecting d3rlpy
  Downloading d3rlpy-2.6.1-py3-none-any.whl.metadata (11 kB)
Collecting gym>=0.26.0 (from d3rlpy)
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting structlog (from d3rlpy)
  Downloading structlog-24.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting colorama (from d3rlpy)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting dataclasses-json (from d3rlpy)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting gymnasium (from d3rlpy)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->d3rlpy)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata 

In [None]:
from google.colab import files
import numpy as np
import d3rlpy
import os

In [None]:
!wget -O full_dataset.zip "https://zenodo.org/record/13830810/files/full_dataset.zip?download=1"

--2024-09-25 22:09:05--  https://zenodo.org/record/13830810/files/full_dataset.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.184.98.238, 188.184.103.159, 188.185.79.172, ...
Connecting to zenodo.org (zenodo.org)|188.184.98.238|:443... connected.
HTTP request sent, awaiting response... 301 MOVED PERMANENTLY
Location: /records/13830810/files/full_dataset.zip [following]
--2024-09-25 22:09:06--  https://zenodo.org/records/13830810/files/full_dataset.zip
Reusing existing connection to zenodo.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 2161582746 (2.0G) [application/octet-stream]
Saving to: ‘full_dataset.zip’


2024-09-25 22:10:24 (26.5 MB/s) - ‘full_dataset.zip’ saved [2161582746/2161582746]



In [None]:
!unzip full_dataset.zip -d /content/
data_dir = '/content/content/data/data'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/content/data/data/ep_38406.npz  
  inflating: /content/content/data/data/ep_45886.npz  
  inflating: /content/content/data/data/ep_80720.npz  
  inflating: /content/content/data/data/ep_41811.npz  
  inflating: /content/content/data/data/ep_74361.npz  
  inflating: /content/content/data/data/ep_36142.npz  
  inflating: /content/content/data/data/ep_92647.npz  
  inflating: /content/content/data/data/ep_35769.npz  
  inflating: /content/content/data/data/ep_70452.npz  
  inflating: /content/content/data/data/ep_89001.npz  
  inflating: /content/content/data/data/ep_92324.npz  
  inflating: /content/content/data/data/ep_21612.npz  
  inflating: /content/content/data/data/ep_51788.npz  
  inflating: /content/content/data/data/ep_51083.npz  
  inflating: /content/content/data/data/ep_54442.npz  
  inflating: /content/content/data/data/ep_97628.npz  
  inflating: /content/content/data/data/ep_26720.npz  


# Prepare Dataset for d3rlpy

---

The following code loads all of the data into arrays and creates a d3rlpy dataset.

In [None]:
total_steps = 5000000

all_observations = np.empty((total_steps, 3, 64, 64), dtype=np.uint8)
all_actions = np.empty((total_steps, 3), dtype=np.float32)
all_rewards = np.empty((total_steps,), dtype=np.float32)
all_terminals = np.empty((total_steps,), dtype=bool)

current_index = 0

for i in range(1, 1001):
    episode_file = os.path.join(data_dir, f'ep_{i}.npz')
    episode_data = np.load(episode_file)

    num_steps = episode_data['terminals'].shape[0]

    all_observations[current_index:current_index + num_steps] = np.transpose(episode_data['observations'], (0, 3, 1, 2))
    all_actions[current_index:current_index + num_steps] = episode_data['actions']
    all_rewards[current_index:current_index + num_steps] = episode_data['rewards']
    all_terminals[current_index:current_index + num_steps] = episode_data['terminals']

    current_index += num_steps

all_observations = all_observations[:current_index]
all_actions = all_actions[:current_index]
all_rewards = all_rewards[:current_index]
all_terminals = all_terminals[:current_index]

In [None]:
dataset = d3rlpy.dataset.MDPDataset(
    observations=all_observations,
    actions=all_actions,
    rewards=all_rewards,
    terminals=all_terminals,
)

[2m2024-09-25 22:15.44[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(3,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('uint8')], shape=[(3, 64, 64)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2024-09-25 22:15.44[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.CONTINUOUS: 1>[0m
[2m2024-09-25 22:15.44[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m3[0m


# Model Training

---

The following code trains a behavior cloning algorithm for 50 epochs at 10000 steps per epoch.

In [None]:
BC = d3rlpy.algos.BCConfig(observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),).create(device='cuda')
BC.fit(dataset=dataset, n_steps=500000, n_steps_per_epoch=10000)

[2m2024-09-25 22:15.44[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 64, 64)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-09-25 22:15.44[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/BC_20240925221544[0m
[2m2024-09-25 22:15.44[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-09-25 22:15.45[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-09-25 22:15.45[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 64, 64], 'action_size': 3, 'config': {'type': 'bc', 'params': {'batch_size': 100, 'gamma': 0.99, 'observation_scaler': {'type': 'pixel', 'params': {}}, 'act

Epoch 1/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:17.09[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=1 step=10000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030334354162216185, 'time_algorithm_update': 0.005135714602470398, 'loss': 0.006612926368432818, 'time_step': 0.008275382828712463}[0m [36mstep[0m=[35m10000[0m
[2m2024-09-25 22:17.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_10000.d3[0m


Epoch 2/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:18.32[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=2 step=20000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030843403816223146, 'time_algorithm_update': 0.005055114841461181, 'loss': 0.0008610126124694944, 'time_step': 0.00823603835105896}[0m [36mstep[0m=[35m20000[0m
[2m2024-09-25 22:18.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_20000.d3[0m


Epoch 3/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:19.54[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=3 step=30000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030408738136291505, 'time_algorithm_update': 0.005010224819183349, 'loss': 0.00048361232985334935, 'time_step': 0.00814808669090271}[0m [36mstep[0m=[35m30000[0m
[2m2024-09-25 22:19.54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_30000.d3[0m


Epoch 4/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:21.16[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=4 step=40000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030202298879623414, 'time_algorithm_update': 0.0049900271892547605, 'loss': 0.00033574361195642266, 'time_step': 0.008103820538520812}[0m [36mstep[0m=[35m40000[0m
[2m2024-09-25 22:21.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_40000.d3[0m


Epoch 5/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:22.37[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=5 step=50000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0029990467071533203, 'time_algorithm_update': 0.004988898253440857, 'loss': 0.0002664233852759935, 'time_step': 0.00808110785484314}[0m [36mstep[0m=[35m50000[0m
[2m2024-09-25 22:22.37[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_50000.d3[0m


Epoch 6/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:23.59[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=6 step=60000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0029978957176208494, 'time_algorithm_update': 0.004985409140586853, 'loss': 0.00021868137477285926, 'time_step': 0.008075953316688537}[0m [36mstep[0m=[35m60000[0m
[2m2024-09-25 22:23.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_60000.d3[0m


Epoch 7/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:25.20[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=7 step=70000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0029979000091552733, 'time_algorithm_update': 0.004998230457305909, 'loss': 0.00019072960339544808, 'time_step': 0.008089294815063477}[0m [36mstep[0m=[35m70000[0m
[2m2024-09-25 22:25.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_70000.d3[0m


Epoch 8/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:26.42[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=8 step=80000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002990638017654419, 'time_algorithm_update': 0.004988453006744385, 'loss': 0.00017225735680331128, 'time_step': 0.008071616101264953}[0m [36mstep[0m=[35m80000[0m
[2m2024-09-25 22:26.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_80000.d3[0m


Epoch 9/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:28.03[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=9 step=90000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003003113865852356, 'time_algorithm_update': 0.004990204191207886, 'loss': 0.000150412756150763, 'time_step': 0.008085942006111145}[0m [36mstep[0m=[35m90000[0m
[2m2024-09-25 22:28.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_90000.d3[0m


Epoch 10/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:29.25[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=10 step=100000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030288986921310425, 'time_algorithm_update': 0.00501917815208435, 'loss': 0.00014187683194541022, 'time_step': 0.008141453075408936}[0m [36mstep[0m=[35m100000[0m
[2m2024-09-25 22:29.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_100000.d3[0m


Epoch 11/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:30.47[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=11 step=110000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030026840686798095, 'time_algorithm_update': 0.0049921221256256105, 'loss': 0.00013200883480749326, 'time_step': 0.008087420606613159}[0m [36mstep[0m=[35m110000[0m
[2m2024-09-25 22:30.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_110000.d3[0m


Epoch 12/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:32.08[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=12 step=120000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030005517959594726, 'time_algorithm_update': 0.004986660432815552, 'loss': 0.00012127360659796977, 'time_step': 0.008079590845108032}[0m [36mstep[0m=[35m120000[0m
[2m2024-09-25 22:32.08[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_120000.d3[0m


Epoch 13/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:33.30[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=13 step=130000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030094187974929808, 'time_algorithm_update': 0.004987542200088501, 'loss': 0.00012230195198571891, 'time_step': 0.008091070461273194}[0m [36mstep[0m=[35m130000[0m
[2m2024-09-25 22:33.30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_130000.d3[0m


Epoch 14/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:34.52[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=14 step=140000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030171133518218994, 'time_algorithm_update': 0.005015500664710999, 'loss': 0.00011520006854298117, 'time_step': 0.00812786099910736}[0m [36mstep[0m=[35m140000[0m
[2m2024-09-25 22:34.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_140000.d3[0m


Epoch 15/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:36.13[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=15 step=150000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003012176465988159, 'time_algorithm_update': 0.005000535917282105, 'loss': 0.00010881786865538743, 'time_step': 0.008109005761146546}[0m [36mstep[0m=[35m150000[0m
[2m2024-09-25 22:36.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_150000.d3[0m


Epoch 16/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:37.35[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=16 step=160000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030022518396377565, 'time_algorithm_update': 0.004992285704612732, 'loss': 0.00010657548163126193, 'time_step': 0.008088860654830932}[0m [36mstep[0m=[35m160000[0m
[2m2024-09-25 22:37.35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_160000.d3[0m


Epoch 17/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:38.57[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=17 step=170000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00302805597782135, 'time_algorithm_update': 0.0050114104270935055, 'loss': 9.730615418666275e-05, 'time_step': 0.008139464569091797}[0m [36mstep[0m=[35m170000[0m
[2m2024-09-25 22:38.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_170000.d3[0m


Epoch 18/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:40.19[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=18 step=180000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003011163067817688, 'time_algorithm_update': 0.004992794489860535, 'loss': 0.0001051320228007171, 'time_step': 0.008101546025276183}[0m [36mstep[0m=[35m180000[0m
[2m2024-09-25 22:40.19[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_180000.d3[0m


Epoch 19/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:41.41[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=19 step=190000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030258197069168093, 'time_algorithm_update': 0.005004821467399597, 'loss': 9.253250291640142e-05, 'time_step': 0.00813566858768463}[0m [36mstep[0m=[35m190000[0m
[2m2024-09-25 22:41.41[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_190000.d3[0m


Epoch 20/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:43.03[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=20 step=200000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030217709064483643, 'time_algorithm_update': 0.005007591152191162, 'loss': 8.957724262272677e-05, 'time_step': 0.008132351326942443}[0m [36mstep[0m=[35m200000[0m
[2m2024-09-25 22:43.03[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_200000.d3[0m


Epoch 21/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:44.25[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=21 step=210000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003029130220413208, 'time_algorithm_update': 0.005010725164413452, 'loss': 9.519343411502632e-05, 'time_step': 0.008143524050712586}[0m [36mstep[0m=[35m210000[0m
[2m2024-09-25 22:44.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_210000.d3[0m


Epoch 22/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:45.47[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=22 step=220000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00303759024143219, 'time_algorithm_update': 0.005008299541473389, 'loss': 8.357113890397158e-05, 'time_step': 0.008153121733665466}[0m [36mstep[0m=[35m220000[0m
[2m2024-09-25 22:45.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_220000.d3[0m


Epoch 23/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:47.09[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=23 step=230000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030373414039611816, 'time_algorithm_update': 0.005001111316680908, 'loss': 8.904212876896053e-05, 'time_step': 0.008142271947860718}[0m [36mstep[0m=[35m230000[0m
[2m2024-09-25 22:47.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_230000.d3[0m


Epoch 24/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:48.31[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=24 step=240000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030409476518630982, 'time_algorithm_update': 0.005013581538200379, 'loss': 7.729972895849642e-05, 'time_step': 0.008159242534637451}[0m [36mstep[0m=[35m240000[0m
[2m2024-09-25 22:48.31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_240000.d3[0m


Epoch 25/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:49.54[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=25 step=250000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003045671916007996, 'time_algorithm_update': 0.0050188910961151125, 'loss': 8.588152053343947e-05, 'time_step': 0.008173644471168518}[0m [36mstep[0m=[35m250000[0m
[2m2024-09-25 22:49.54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_250000.d3[0m


Epoch 26/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:51.16[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=26 step=260000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030429353952407838, 'time_algorithm_update': 0.005016464018821716, 'loss': 7.779724143692875e-05, 'time_step': 0.00816672215461731}[0m [36mstep[0m=[35m260000[0m
[2m2024-09-25 22:51.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_260000.d3[0m


Epoch 27/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:52.38[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=27 step=270000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030411281585693358, 'time_algorithm_update': 0.005008781862258911, 'loss': 8.537675049319659e-05, 'time_step': 0.008157679224014282}[0m [36mstep[0m=[35m270000[0m
[2m2024-09-25 22:52.38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_270000.d3[0m


Epoch 28/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:54.01[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=28 step=280000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030369365453720093, 'time_algorithm_update': 0.005016816234588623, 'loss': 6.356795135307039e-05, 'time_step': 0.008158707857131959}[0m [36mstep[0m=[35m280000[0m
[2m2024-09-25 22:54.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_280000.d3[0m


Epoch 29/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:55.23[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=29 step=290000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030456984043121336, 'time_algorithm_update': 0.005004222059249878, 'loss': 7.407436333087389e-05, 'time_step': 0.008159600281715394}[0m [36mstep[0m=[35m290000[0m
[2m2024-09-25 22:55.23[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_290000.d3[0m


Epoch 30/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:56.45[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=30 step=300000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030307633876800535, 'time_algorithm_update': 0.004993483114242554, 'loss': 7.162278622454323e-05, 'time_step': 0.008131723093986512}[0m [36mstep[0m=[35m300000[0m
[2m2024-09-25 22:56.45[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_300000.d3[0m


Epoch 31/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:58.07[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=31 step=310000[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003051202607154846, 'time_algorithm_update': 0.005001025247573852, 'loss': 6.864968091631454e-05, 'time_step': 0.008162138628959656}[0m [36mstep[0m=[35m310000[0m
[2m2024-09-25 22:58.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_310000.d3[0m


Epoch 32/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:59.29[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=32 step=320000[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030397820234298704, 'time_algorithm_update': 0.005006679630279541, 'loss': 7.315450594160212e-05, 'time_step': 0.008156840920448303}[0m [36mstep[0m=[35m320000[0m
[2m2024-09-25 22:59.29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_320000.d3[0m


Epoch 33/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:00.52[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=33 step=330000[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030431879043579103, 'time_algorithm_update': 0.005019949412345886, 'loss': 7.080302601752919e-05, 'time_step': 0.008171632099151612}[0m [36mstep[0m=[35m330000[0m
[2m2024-09-25 23:00.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_330000.d3[0m


Epoch 34/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:02.14[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=34 step=340000[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030404593944549562, 'time_algorithm_update': 0.005017952871322632, 'loss': 6.34250885151232e-05, 'time_step': 0.008165121793746948}[0m [36mstep[0m=[35m340000[0m
[2m2024-09-25 23:02.14[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_340000.d3[0m


Epoch 35/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:03.36[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=35 step=350000[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030472761392593382, 'time_algorithm_update': 0.00501598608493805, 'loss': 6.733849963584361e-05, 'time_step': 0.008172260928153992}[0m [36mstep[0m=[35m350000[0m
[2m2024-09-25 23:03.36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_350000.d3[0m


Epoch 36/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:04.59[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=36 step=360000[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003053878664970398, 'time_algorithm_update': 0.005018168210983277, 'loss': 6.422802944334762e-05, 'time_step': 0.008182548522949218}[0m [36mstep[0m=[35m360000[0m
[2m2024-09-25 23:04.59[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_360000.d3[0m


Epoch 37/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:06.22[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=37 step=370000[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003052018642425537, 'time_algorithm_update': 0.005032876253128052, 'loss': 6.320140318966878e-05, 'time_step': 0.008195145058631897}[0m [36mstep[0m=[35m370000[0m
[2m2024-09-25 23:06.22[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_370000.d3[0m


Epoch 38/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:07.44[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=38 step=380000[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030477614879608154, 'time_algorithm_update': 0.005014340758323669, 'loss': 6.83783310148101e-05, 'time_step': 0.008171777248382568}[0m [36mstep[0m=[35m380000[0m
[2m2024-09-25 23:07.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_380000.d3[0m


Epoch 39/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:09.06[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=39 step=390000[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030470629215240477, 'time_algorithm_update': 0.005027965593338013, 'loss': 5.6942072082620144e-05, 'time_step': 0.008179939103126525}[0m [36mstep[0m=[35m390000[0m
[2m2024-09-25 23:09.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_390000.d3[0m


Epoch 40/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:10.29[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=40 step=400000[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030474158763885498, 'time_algorithm_update': 0.005013126277923584, 'loss': 6.591113100548683e-05, 'time_step': 0.00816860854625702}[0m [36mstep[0m=[35m400000[0m
[2m2024-09-25 23:10.29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_400000.d3[0m


Epoch 41/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:11.51[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=41 step=410000[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030326369524002073, 'time_algorithm_update': 0.005009442543983459, 'loss': 5.932066695049798e-05, 'time_step': 0.008148064875602722}[0m [36mstep[0m=[35m410000[0m
[2m2024-09-25 23:11.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_410000.d3[0m


Epoch 42/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:13.13[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=42 step=420000[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003039023470878601, 'time_algorithm_update': 0.005022805523872375, 'loss': 5.585920204375725e-05, 'time_step': 0.008167717862129211}[0m [36mstep[0m=[35m420000[0m
[2m2024-09-25 23:13.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_420000.d3[0m


Epoch 43/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:14.35[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=43 step=430000[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030308390617370604, 'time_algorithm_update': 0.005013163590431214, 'loss': 5.722930376387012e-05, 'time_step': 0.008151806974411011}[0m [36mstep[0m=[35m430000[0m
[2m2024-09-25 23:14.35[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_430000.d3[0m


Epoch 44/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:15.58[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=44 step=440000[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003056881260871887, 'time_algorithm_update': 0.005038969516754151, 'loss': 5.989020759543564e-05, 'time_step': 0.008206556344032287}[0m [36mstep[0m=[35m440000[0m
[2m2024-09-25 23:15.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_440000.d3[0m


Epoch 45/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:17.21[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=45 step=450000[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003040020823478699, 'time_algorithm_update': 0.005020646095275879, 'loss': 5.532124825331266e-05, 'time_step': 0.00817019214630127}[0m [36mstep[0m=[35m450000[0m
[2m2024-09-25 23:17.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_450000.d3[0m


Epoch 46/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:18.43[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=46 step=460000[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030588317394256593, 'time_algorithm_update': 0.005031288075447083, 'loss': 5.6071326932669765e-05, 'time_step': 0.008201047253608703}[0m [36mstep[0m=[35m460000[0m
[2m2024-09-25 23:18.43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_460000.d3[0m


Epoch 47/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:20.06[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=47 step=470000[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030457961559295656, 'time_algorithm_update': 0.00502301778793335, 'loss': 5.950586878825561e-05, 'time_step': 0.008179137134552003}[0m [36mstep[0m=[35m470000[0m
[2m2024-09-25 23:20.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_470000.d3[0m


Epoch 48/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:21.28[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=48 step=480000[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.003041471815109253, 'time_algorithm_update': 0.005025223445892334, 'loss': 4.983029556251495e-05, 'time_step': 0.008172040557861329}[0m [36mstep[0m=[35m480000[0m
[2m2024-09-25 23:21.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_480000.d3[0m


Epoch 49/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:22.51[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=49 step=490000[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030454928636550905, 'time_algorithm_update': 0.005028397107124329, 'loss': 5.199615664441808e-05, 'time_step': 0.008181882309913636}[0m [36mstep[0m=[35m490000[0m
[2m2024-09-25 23:22.51[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_490000.d3[0m


Epoch 50/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 23:24.13[0m [[32m[1minfo     [0m] [1mBC_20240925221544: epoch=50 step=500000[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0030592368125915525, 'time_algorithm_update': 0.005022936320304871, 'loss': 6.158876042991323e-05, 'time_step': 0.00819354944229126}[0m [36mstep[0m=[35m500000[0m
[2m2024-09-25 23:24.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925221544/model_500000.d3[0m


[(1,
  {'time_sample_batch': 0.0030334354162216185,
   'time_algorithm_update': 0.005135714602470398,
   'loss': 0.006612926368432818,
   'time_step': 0.008275382828712463}),
 (2,
  {'time_sample_batch': 0.0030843403816223146,
   'time_algorithm_update': 0.005055114841461181,
   'loss': 0.0008610126124694944,
   'time_step': 0.00823603835105896}),
 (3,
  {'time_sample_batch': 0.0030408738136291505,
   'time_algorithm_update': 0.005010224819183349,
   'loss': 0.00048361232985334935,
   'time_step': 0.00814808669090271}),
 (4,
  {'time_sample_batch': 0.0030202298879623414,
   'time_algorithm_update': 0.0049900271892547605,
   'loss': 0.00033574361195642266,
   'time_step': 0.008103820538520812}),
 (5,
  {'time_sample_batch': 0.0029990467071533203,
   'time_algorithm_update': 0.004988898253440857,
   'loss': 0.0002664233852759935,
   'time_step': 0.00808110785484314}),
 (6,
  {'time_sample_batch': 0.0029978957176208494,
   'time_algorithm_update': 0.004985409140586853,
   'loss': 0.000218

In [None]:
!zip -r model_hundredth_dataset.zip d3rlpy_logs

  adding: d3rlpy_logs/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925221544/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925221544/model_90000.d3 (deflated 39%)
  adding: d3rlpy_logs/BC_20240925221544/time_algorithm_update.csv (deflated 61%)
  adding: d3rlpy_logs/BC_20240925221544/time_step.csv (deflated 60%)
  adding: d3rlpy_logs/BC_20240925221544/model_30000.d3 (deflated 23%)
  adding: d3rlpy_logs/BC_20240925221544/model_320000.d3 (deflated 44%)
  adding: d3rlpy_logs/BC_20240925221544/model_440000.d3 (deflated 45%)
  adding: d3rlpy_logs/BC_20240925221544/model_340000.d3 (deflated 44%)
  adding: d3rlpy_logs/BC_20240925221544/model_250000.d3 (deflated 44%)
  adding: d3rlpy_logs/BC_20240925221544/model_160000.d3 (deflated 42%)
  adding: d3rlpy_logs/BC_20240925221544/model_290000.d3 (deflated 44%)
  adding: d3rlpy_logs/BC_20240925221544/model_130000.d3 (deflated 41%)
  adding: d3rlpy_logs/BC_20240925221544/model_500000.d3 (deflated 45%)
  adding: d3rlpy_logs/BC_20240925221544/model_140000.d