 # Install/Import Packages and Download/Unzip Dataset

 ---

 The following code installs d3rlpy, imports all necessary dependencies, and downloads/unzips the dataset from Zenodo.

In [None]:
!pip install d3rlpy==2.6.1 gymnasium==0.29.1

Collecting d3rlpy
  Downloading d3rlpy-2.6.1-py3-none-any.whl.metadata (11 kB)
Collecting gym>=0.26.0 (from d3rlpy)
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting structlog (from d3rlpy)
  Downloading structlog-24.4.0-py3-none-any.whl.metadata (7.3 kB)
Collecting colorama (from d3rlpy)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting dataclasses-json (from d3rlpy)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting gymnasium (from d3rlpy)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json->d3rlpy)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata 

In [None]:
from google.colab import files
import numpy as np
import d3rlpy
import os

In [None]:
!wget -O full_dataset.zip "https://zenodo.org/record/13830810/files/full_dataset.zip?download=1"

--2024-09-25 20:52:48--  https://zenodo.org/record/13830810/files/full_dataset.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.184.103.159, 188.185.79.172, 188.184.98.238, ...
Connecting to zenodo.org (zenodo.org)|188.184.103.159|:443... connected.
HTTP request sent, awaiting response... 301 MOVED PERMANENTLY
Location: /records/13830810/files/full_dataset.zip [following]
--2024-09-25 20:52:49--  https://zenodo.org/records/13830810/files/full_dataset.zip
Reusing existing connection to zenodo.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 2161582746 (2.0G) [application/octet-stream]
Saving to: ‘full_dataset.zip’


2024-09-25 20:54:16 (23.6 MB/s) - ‘full_dataset.zip’ saved [2161582746/2161582746]



In [None]:
!unzip full_dataset.zip -d /content/
data_dir = '/content/content/data/data'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/content/data/data/ep_38406.npz  
  inflating: /content/content/data/data/ep_45886.npz  
  inflating: /content/content/data/data/ep_80720.npz  
  inflating: /content/content/data/data/ep_41811.npz  
  inflating: /content/content/data/data/ep_74361.npz  
  inflating: /content/content/data/data/ep_36142.npz  
  inflating: /content/content/data/data/ep_92647.npz  
  inflating: /content/content/data/data/ep_35769.npz  
  inflating: /content/content/data/data/ep_70452.npz  
  inflating: /content/content/data/data/ep_89001.npz  
  inflating: /content/content/data/data/ep_92324.npz  
  inflating: /content/content/data/data/ep_21612.npz  
  inflating: /content/content/data/data/ep_51788.npz  
  inflating: /content/content/data/data/ep_51083.npz  
  inflating: /content/content/data/data/ep_54442.npz  
  inflating: /content/content/data/data/ep_97628.npz  
  inflating: /content/content/data/data/ep_26720.npz  


# Prepare Dataset for d3rlpy

---

The following code loads all of the data into arrays and creates a d3rlpy dataset.

In [None]:
total_steps = 5000000

all_observations = np.empty((total_steps, 3, 64, 64), dtype=np.uint8)
all_actions = np.empty((total_steps, 3), dtype=np.float32)
all_rewards = np.empty((total_steps,), dtype=np.float32)
all_terminals = np.empty((total_steps,), dtype=bool)

current_index = 0

for i in range(1, 25001):
    episode_file = os.path.join(data_dir, f'ep_{i}.npz')
    episode_data = np.load(episode_file)

    num_steps = episode_data['terminals'].shape[0]

    all_observations[current_index:current_index + num_steps] = np.transpose(episode_data['observations'], (0, 3, 1, 2))
    all_actions[current_index:current_index + num_steps] = episode_data['actions']
    all_rewards[current_index:current_index + num_steps] = episode_data['rewards']
    all_terminals[current_index:current_index + num_steps] = episode_data['terminals']

    current_index += num_steps

all_observations = all_observations[:current_index]
all_actions = all_actions[:current_index]
all_rewards = all_rewards[:current_index]
all_terminals = all_terminals[:current_index]

In [None]:
dataset = d3rlpy.dataset.MDPDataset(
    observations=all_observations,
    actions=all_actions,
    rewards=all_rewards,
    terminals=all_terminals,
)

[2m2024-09-25 21:01.10[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(3,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('uint8')], shape=[(3, 64, 64)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2024-09-25 21:01.10[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.CONTINUOUS: 1>[0m
[2m2024-09-25 21:01.10[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m3[0m


# Model Training

---

The following code trains a behavior cloning algorithm for 50 epochs at 10000 steps per epoch.

In [None]:
BC = d3rlpy.algos.BCConfig(observation_scaler=d3rlpy.preprocessing.PixelObservationScaler(),).create(device='cuda')
BC.fit(dataset=dataset, n_steps=500000, n_steps_per_epoch=10000)

[2m2024-09-25 21:01.11[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('uint8')], shape=[(3, 64, 64)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(3,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.CONTINUOUS: 1>, action_size=3)[0m
[2m2024-09-25 21:01.11[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/BC_20240925210111[0m
[2m2024-09-25 21:01.11[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2024-09-25 21:01.13[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2024-09-25 21:01.13[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [3, 64, 64], 'action_size': 3, 'config': {'type': 'bc', 'params': {'batch_size': 100, 'gamma': 0.99, 'observation_scaler': {'type': 'pixel', 'params': {}}, 'act

Epoch 1/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:02.33[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=1 step=10000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028660141229629515, 'time_algorithm_update': 0.005022420930862427, 'loss': 0.01563481689346954, 'time_step': 0.007979437351226807}[0m [36mstep[0m=[35m10000[0m
[2m2024-09-25 21:02.33[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_10000.d3[0m


Epoch 2/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:03.52[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=2 step=20000[0m [36mepoch[0m=[35m2[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002807782959938049, 'time_algorithm_update': 0.004874714803695679, 'loss': 0.008621751333330758, 'time_step': 0.007771308946609497}[0m [36mstep[0m=[35m20000[0m
[2m2024-09-25 21:03.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_20000.d3[0m


Epoch 3/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:05.10[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=3 step=30000[0m [36mepoch[0m=[35m3[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028023838996887205, 'time_algorithm_update': 0.004854305624961853, 'loss': 0.00738925339858979, 'time_step': 0.007745253396034241}[0m [36mstep[0m=[35m30000[0m
[2m2024-09-25 21:05.10[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_30000.d3[0m


Epoch 4/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:06.28[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=4 step=40000[0m [36mepoch[0m=[35m4[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028173617601394653, 'time_algorithm_update': 0.004886588954925537, 'loss': 0.0066413570678792895, 'time_step': 0.007792285108566284}[0m [36mstep[0m=[35m40000[0m
[2m2024-09-25 21:06.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_40000.d3[0m


Epoch 5/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:07.47[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=5 step=50000[0m [36mepoch[0m=[35m5[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028092530012130737, 'time_algorithm_update': 0.004891440606117249, 'loss': 0.006154018088849261, 'time_step': 0.007789110708236694}[0m [36mstep[0m=[35m50000[0m
[2m2024-09-25 21:07.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_50000.d3[0m


Epoch 6/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:09.05[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=6 step=60000[0m [36mepoch[0m=[35m6[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028082342863082884, 'time_algorithm_update': 0.004899337792396545, 'loss': 0.005777203653869219, 'time_step': 0.007794552493095398}[0m [36mstep[0m=[35m60000[0m
[2m2024-09-25 21:09.05[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_60000.d3[0m


Epoch 7/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:10.24[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=7 step=70000[0m [36mepoch[0m=[35m7[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002808179473876953, 'time_algorithm_update': 0.004889301967620849, 'loss': 0.005491482684738003, 'time_step': 0.00778476243019104}[0m [36mstep[0m=[35m70000[0m
[2m2024-09-25 21:10.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_70000.d3[0m


Epoch 8/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:11.42[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=8 step=80000[0m [36mepoch[0m=[35m8[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028098910331726076, 'time_algorithm_update': 0.004888796496391296, 'loss': 0.005249310752213933, 'time_step': 0.0077851825475692745}[0m [36mstep[0m=[35m80000[0m
[2m2024-09-25 21:11.42[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_80000.d3[0m


Epoch 9/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:13.01[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=9 step=90000[0m [36mepoch[0m=[35m9[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002822067141532898, 'time_algorithm_update': 0.004895310378074646, 'loss': 0.005068249808670953, 'time_step': 0.007805322027206421}[0m [36mstep[0m=[35m90000[0m
[2m2024-09-25 21:13.01[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_90000.d3[0m


Epoch 10/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:14.20[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=10 step=100000[0m [36mepoch[0m=[35m10[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028220513105392457, 'time_algorithm_update': 0.004895762872695923, 'loss': 0.004878629932552576, 'time_step': 0.007804873561859131}[0m [36mstep[0m=[35m100000[0m
[2m2024-09-25 21:14.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_100000.d3[0m


Epoch 11/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:15.38[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=11 step=110000[0m [36mepoch[0m=[35m11[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002819279909133911, 'time_algorithm_update': 0.004893472909927368, 'loss': 0.004761661758134142, 'time_step': 0.007800611281394958}[0m [36mstep[0m=[35m110000[0m
[2m2024-09-25 21:15.38[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_110000.d3[0m


Epoch 12/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:16.57[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=12 step=120000[0m [36mepoch[0m=[35m12[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028256412267684936, 'time_algorithm_update': 0.00489627857208252, 'loss': 0.0046230023906799035, 'time_step': 0.007809714770317077}[0m [36mstep[0m=[35m120000[0m
[2m2024-09-25 21:16.57[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_120000.d3[0m


Epoch 13/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:18.16[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=13 step=130000[0m [36mepoch[0m=[35m13[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028157402276992797, 'time_algorithm_update': 0.004885613226890564, 'loss': 0.00453711534217, 'time_step': 0.007788479447364807}[0m [36mstep[0m=[35m130000[0m
[2m2024-09-25 21:18.16[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_130000.d3[0m


Epoch 14/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:19.34[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=14 step=140000[0m [36mepoch[0m=[35m14[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028074408292770386, 'time_algorithm_update': 0.004879387068748474, 'loss': 0.004450778069393709, 'time_step': 0.007773299765586853}[0m [36mstep[0m=[35m140000[0m
[2m2024-09-25 21:19.34[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_140000.d3[0m


Epoch 15/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:20.52[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=15 step=150000[0m [36mepoch[0m=[35m15[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028104110717773437, 'time_algorithm_update': 0.004888222074508667, 'loss': 0.0043662842758465555, 'time_step': 0.007785477566719055}[0m [36mstep[0m=[35m150000[0m
[2m2024-09-25 21:20.52[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_150000.d3[0m


Epoch 16/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:22.11[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=16 step=160000[0m [36mepoch[0m=[35m16[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002813558530807495, 'time_algorithm_update': 0.004889518332481384, 'loss': 0.004305019774660468, 'time_step': 0.007790629076957703}[0m [36mstep[0m=[35m160000[0m
[2m2024-09-25 21:22.11[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_160000.d3[0m


Epoch 17/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:23.29[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=17 step=170000[0m [36mepoch[0m=[35m17[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002815942144393921, 'time_algorithm_update': 0.004885118198394775, 'loss': 0.004229503640998155, 'time_step': 0.00778801143169403}[0m [36mstep[0m=[35m170000[0m
[2m2024-09-25 21:23.30[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_170000.d3[0m


Epoch 18/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:24.48[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=18 step=180000[0m [36mepoch[0m=[35m18[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.00281638286113739, 'time_algorithm_update': 0.004884912848472595, 'loss': 0.004197982952324673, 'time_step': 0.0077887794494628905}[0m [36mstep[0m=[35m180000[0m
[2m2024-09-25 21:24.48[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_180000.d3[0m


Epoch 19/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:26.07[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=19 step=190000[0m [36mepoch[0m=[35m19[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002822888445854187, 'time_algorithm_update': 0.004887163400650024, 'loss': 0.004130697878287174, 'time_step': 0.007797375512123108}[0m [36mstep[0m=[35m190000[0m
[2m2024-09-25 21:26.07[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_190000.d3[0m


Epoch 20/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:27.25[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=20 step=200000[0m [36mepoch[0m=[35m20[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028248640298843383, 'time_algorithm_update': 0.004887300729751587, 'loss': 0.004057757729873992, 'time_step': 0.007800013756752014}[0m [36mstep[0m=[35m200000[0m
[2m2024-09-25 21:27.25[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_200000.d3[0m


Epoch 21/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:28.44[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=21 step=210000[0m [36mepoch[0m=[35m21[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002819014263153076, 'time_algorithm_update': 0.004883189988136291, 'loss': 0.004035625756997615, 'time_step': 0.0077894421339035035}[0m [36mstep[0m=[35m210000[0m
[2m2024-09-25 21:28.44[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_210000.d3[0m


Epoch 22/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:30.02[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=22 step=220000[0m [36mepoch[0m=[35m22[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028242063999176026, 'time_algorithm_update': 0.004889927983283996, 'loss': 0.003991657308209688, 'time_step': 0.00780193932056427}[0m [36mstep[0m=[35m220000[0m
[2m2024-09-25 21:30.02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_220000.d3[0m


Epoch 23/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:31.21[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=23 step=230000[0m [36mepoch[0m=[35m23[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002817748761177063, 'time_algorithm_update': 0.004884334397315979, 'loss': 0.00393060746320989, 'time_step': 0.0077889131546020505}[0m [36mstep[0m=[35m230000[0m
[2m2024-09-25 21:31.21[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_230000.d3[0m


Epoch 24/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:32.39[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=24 step=240000[0m [36mepoch[0m=[35m24[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028224764108657836, 'time_algorithm_update': 0.004881655836105347, 'loss': 0.003903360047983006, 'time_step': 0.007791441202163696}[0m [36mstep[0m=[35m240000[0m
[2m2024-09-25 21:32.39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_240000.d3[0m


Epoch 25/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:33.58[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=25 step=250000[0m [36mepoch[0m=[35m25[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002815861749649048, 'time_algorithm_update': 0.004885803246498108, 'loss': 0.0038769412464112977, 'time_step': 0.007788994359970092}[0m [36mstep[0m=[35m250000[0m
[2m2024-09-25 21:33.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_250000.d3[0m


Epoch 26/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:35.17[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=26 step=260000[0m [36mepoch[0m=[35m26[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028202480554580687, 'time_algorithm_update': 0.004882472491264343, 'loss': 0.00383969308657106, 'time_step': 0.0077926189661026}[0m [36mstep[0m=[35m260000[0m
[2m2024-09-25 21:35.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_260000.d3[0m


Epoch 27/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:36.36[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=27 step=270000[0m [36mepoch[0m=[35m27[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028446945667266847, 'time_algorithm_update': 0.004912577080726624, 'loss': 0.0038122223949292673, 'time_step': 0.007845829892158509}[0m [36mstep[0m=[35m270000[0m
[2m2024-09-25 21:36.36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_270000.d3[0m


Epoch 28/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:37.54[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=28 step=280000[0m [36mepoch[0m=[35m28[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028289684057235718, 'time_algorithm_update': 0.004891325044631958, 'loss': 0.0037899127794895323, 'time_step': 0.007808379173278809}[0m [36mstep[0m=[35m280000[0m
[2m2024-09-25 21:37.54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_280000.d3[0m


Epoch 29/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:39.13[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=29 step=290000[0m [36mepoch[0m=[35m29[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028151391983032227, 'time_algorithm_update': 0.004883597946166992, 'loss': 0.0037601703650318088, 'time_step': 0.007786584305763245}[0m [36mstep[0m=[35m290000[0m
[2m2024-09-25 21:39.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_290000.d3[0m


Epoch 30/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:40.32[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=30 step=300000[0m [36mepoch[0m=[35m30[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028293673276901243, 'time_algorithm_update': 0.0048988336086273196, 'loss': 0.003724356518359855, 'time_step': 0.007816477108001709}[0m [36mstep[0m=[35m300000[0m
[2m2024-09-25 21:40.32[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_300000.d3[0m


Epoch 31/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:41.50[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=31 step=310000[0m [36mepoch[0m=[35m31[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002823833203315735, 'time_algorithm_update': 0.004898068380355835, 'loss': 0.0036989481940399855, 'time_step': 0.007810158801078796}[0m [36mstep[0m=[35m310000[0m
[2m2024-09-25 21:41.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_310000.d3[0m


Epoch 32/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:43.09[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=32 step=320000[0m [36mepoch[0m=[35m32[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002828168177604675, 'time_algorithm_update': 0.004900870060920715, 'loss': 0.003692701050580945, 'time_step': 0.007817015099525452}[0m [36mstep[0m=[35m320000[0m
[2m2024-09-25 21:43.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_320000.d3[0m


Epoch 33/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:44.28[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=33 step=330000[0m [36mepoch[0m=[35m33[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028355438232421874, 'time_algorithm_update': 0.00490710883140564, 'loss': 0.003658918912347872, 'time_step': 0.007831411218643189}[0m [36mstep[0m=[35m330000[0m
[2m2024-09-25 21:44.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_330000.d3[0m


Epoch 34/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:45.47[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=34 step=340000[0m [36mepoch[0m=[35m34[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028228951930999754, 'time_algorithm_update': 0.0048907901525497436, 'loss': 0.003662261669174768, 'time_step': 0.0078031268119812015}[0m [36mstep[0m=[35m340000[0m
[2m2024-09-25 21:45.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_340000.d3[0m


Epoch 35/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:47.06[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=35 step=350000[0m [36mepoch[0m=[35m35[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028367381811141967, 'time_algorithm_update': 0.004903528761863708, 'loss': 0.003636678675573785, 'time_step': 0.007828123092651367}[0m [36mstep[0m=[35m350000[0m
[2m2024-09-25 21:47.06[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_350000.d3[0m


Epoch 36/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:48.24[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=36 step=360000[0m [36mepoch[0m=[35m36[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028227586984634397, 'time_algorithm_update': 0.00489499192237854, 'loss': 0.003597447088430636, 'time_step': 0.007806102061271667}[0m [36mstep[0m=[35m360000[0m
[2m2024-09-25 21:48.24[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_360000.d3[0m


Epoch 37/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:49.43[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=37 step=370000[0m [36mepoch[0m=[35m37[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002816528534889221, 'time_algorithm_update': 0.004888715171813965, 'loss': 0.003580049861909356, 'time_step': 0.007792299246788025}[0m [36mstep[0m=[35m370000[0m
[2m2024-09-25 21:49.43[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_370000.d3[0m


Epoch 38/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:51.02[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=38 step=380000[0m [36mepoch[0m=[35m38[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028245233058929444, 'time_algorithm_update': 0.004887093901634216, 'loss': 0.003574350990494713, 'time_step': 0.007799608612060547}[0m [36mstep[0m=[35m380000[0m
[2m2024-09-25 21:51.02[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_380000.d3[0m


Epoch 39/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:52.20[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=39 step=390000[0m [36mepoch[0m=[35m39[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028334118127822877, 'time_algorithm_update': 0.004895469880104065, 'loss': 0.003548692811327055, 'time_step': 0.007817084741592407}[0m [36mstep[0m=[35m390000[0m
[2m2024-09-25 21:52.20[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_390000.d3[0m


Epoch 40/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:53.39[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=40 step=400000[0m [36mepoch[0m=[35m40[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002840057349205017, 'time_algorithm_update': 0.00489427182674408, 'loss': 0.003533028543787077, 'time_step': 0.007823259687423706}[0m [36mstep[0m=[35m400000[0m
[2m2024-09-25 21:53.39[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_400000.d3[0m


Epoch 41/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:54.58[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=41 step=410000[0m [36mepoch[0m=[35m41[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002825063395500183, 'time_algorithm_update': 0.004900322675704956, 'loss': 0.003524913568829652, 'time_step': 0.007814176630973816}[0m [36mstep[0m=[35m410000[0m
[2m2024-09-25 21:54.58[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_410000.d3[0m


Epoch 42/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:56.17[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=42 step=420000[0m [36mepoch[0m=[35m42[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028248005151748656, 'time_algorithm_update': 0.004894343328475952, 'loss': 0.003522137886181008, 'time_step': 0.007807174205780029}[0m [36mstep[0m=[35m420000[0m
[2m2024-09-25 21:56.17[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_420000.d3[0m


Epoch 43/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:57.36[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=43 step=430000[0m [36mepoch[0m=[35m43[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002831966781616211, 'time_algorithm_update': 0.0049020752191543575, 'loss': 0.0035036241736263038, 'time_step': 0.007822805547714234}[0m [36mstep[0m=[35m430000[0m
[2m2024-09-25 21:57.36[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_430000.d3[0m


Epoch 44/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 21:58.54[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=44 step=440000[0m [36mepoch[0m=[35m44[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002824237608909607, 'time_algorithm_update': 0.0048987612247467045, 'loss': 0.003491367813432589, 'time_step': 0.007811228084564209}[0m [36mstep[0m=[35m440000[0m
[2m2024-09-25 21:58.54[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_440000.d3[0m


Epoch 45/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:00.13[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=45 step=450000[0m [36mepoch[0m=[35m45[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028185774326324464, 'time_algorithm_update': 0.004892246532440186, 'loss': 0.0034765986678074114, 'time_step': 0.007798540043830872}[0m [36mstep[0m=[35m450000[0m
[2m2024-09-25 22:00.13[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_450000.d3[0m


Epoch 46/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:01.31[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=46 step=460000[0m [36mepoch[0m=[35m46[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028168394327163695, 'time_algorithm_update': 0.004891562986373901, 'loss': 0.0034655735669424756, 'time_step': 0.007796366906166077}[0m [36mstep[0m=[35m460000[0m
[2m2024-09-25 22:01.31[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_460000.d3[0m


Epoch 47/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:02.50[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=47 step=470000[0m [36mepoch[0m=[35m47[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.0028265345573425292, 'time_algorithm_update': 0.004898201441764832, 'loss': 0.003442416319379117, 'time_step': 0.007813045048713683}[0m [36mstep[0m=[35m470000[0m
[2m2024-09-25 22:02.50[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_470000.d3[0m


Epoch 48/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:04.09[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=48 step=480000[0m [36mepoch[0m=[35m48[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002831237053871155, 'time_algorithm_update': 0.004895797252655029, 'loss': 0.0034384638297371567, 'time_step': 0.007815404343605042}[0m [36mstep[0m=[35m480000[0m
[2m2024-09-25 22:04.09[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_480000.d3[0m


Epoch 49/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:05.28[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=49 step=490000[0m [36mepoch[0m=[35m49[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002823265266418457, 'time_algorithm_update': 0.004897894525527954, 'loss': 0.003419490486430004, 'time_step': 0.007809411835670471}[0m [36mstep[0m=[35m490000[0m
[2m2024-09-25 22:05.28[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_490000.d3[0m


Epoch 50/50:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2024-09-25 22:06.46[0m [[32m[1minfo     [0m] [1mBC_20240925210111: epoch=50 step=500000[0m [36mepoch[0m=[35m50[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.002828282880783081, 'time_algorithm_update': 0.004894616913795471, 'loss': 0.003426086270972155, 'time_step': 0.0078120455503463745}[0m [36mstep[0m=[35m500000[0m
[2m2024-09-25 22:06.47[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/BC_20240925210111/model_500000.d3[0m


[(1,
  {'time_sample_batch': 0.0028660141229629515,
   'time_algorithm_update': 0.005022420930862427,
   'loss': 0.01563481689346954,
   'time_step': 0.007979437351226807}),
 (2,
  {'time_sample_batch': 0.002807782959938049,
   'time_algorithm_update': 0.004874714803695679,
   'loss': 0.008621751333330758,
   'time_step': 0.007771308946609497}),
 (3,
  {'time_sample_batch': 0.0028023838996887205,
   'time_algorithm_update': 0.004854305624961853,
   'loss': 0.00738925339858979,
   'time_step': 0.007745253396034241}),
 (4,
  {'time_sample_batch': 0.0028173617601394653,
   'time_algorithm_update': 0.004886588954925537,
   'loss': 0.0066413570678792895,
   'time_step': 0.007792285108566284}),
 (5,
  {'time_sample_batch': 0.0028092530012130737,
   'time_algorithm_update': 0.004891440606117249,
   'loss': 0.006154018088849261,
   'time_step': 0.007789110708236694}),
 (6,
  {'time_sample_batch': 0.0028082342863082884,
   'time_algorithm_update': 0.004899337792396545,
   'loss': 0.005777203653

In [None]:
!zip -r model_quarter_dataset.zip d3rlpy_logs

  adding: d3rlpy_logs/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925210111/ (stored 0%)
  adding: d3rlpy_logs/BC_20240925210111/model_90000.d3 (deflated 33%)
  adding: d3rlpy_logs/BC_20240925210111/time_algorithm_update.csv (deflated 61%)
  adding: d3rlpy_logs/BC_20240925210111/time_step.csv (deflated 60%)
  adding: d3rlpy_logs/BC_20240925210111/model_30000.d3 (deflated 21%)
  adding: d3rlpy_logs/BC_20240925210111/model_320000.d3 (deflated 36%)
  adding: d3rlpy_logs/BC_20240925210111/model_440000.d3 (deflated 36%)
  adding: d3rlpy_logs/BC_20240925210111/model_340000.d3 (deflated 36%)
  adding: d3rlpy_logs/BC_20240925210111/model_250000.d3 (deflated 36%)
  adding: d3rlpy_logs/BC_20240925210111/model_160000.d3 (deflated 36%)
  adding: d3rlpy_logs/BC_20240925210111/model_290000.d3 (deflated 36%)
  adding: d3rlpy_logs/BC_20240925210111/model_130000.d3 (deflated 36%)
  adding: d3rlpy_logs/BC_20240925210111/model_500000.d3 (deflated 36%)
  adding: d3rlpy_logs/BC_20240925210111/model_140000.d