In [4]:
import numpy as np
import d3rlpy
from d3rlpy.datasets import get_d4rl
import gym
from d3rlpy.metrics.scorer import evaluate_on_environment
import argparse
import os


os.environ['D4RL_SUPPRESS_IMPORT_ERROR'] = '1'


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
parser = argparse.ArgumentParser()
parser.add_argument("--task", type=str, help="task/game to be played")
parser.add_argument("--algo", type=str, help="algorithm to be used for training")
args = parser.parse_args()

task = args.task #['HalfCheetah-v4', 'Walker2d-v4', 'Ant-v4']
algo = args.algo
print(task)


usage: ipykernel_launcher.py [-h] [--task TASK] [--algo ALGO]
ipykernel_launcher.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9008 --control=9006 --hb=9005 --Session.signature_scheme="hmac-sha256" --Session.key=b"9e8c588e-7ee3-41ea-b19d-c84d6b03e25b" --shell=9007 --transport="tcp" --iopub=9009 --f=/home/raunakk/.local/share/jupyter/runtime/kernel-v2-23007p03Mm7VoJj4E.json


SystemExit: 2

In [7]:
class MODEL():
    def __init__(self, task, algo, gpu=True):
        self.mean_results = []
        self.task = task 
        self.algo = algo
        self.f_params = {"use_gpu": gpu}
        self.engine = None

    def set_engine(self):
        if self.algo == "IQL":
            self.engine = d3rlpy.algos.IQL(**self.f_params)

        elif self.algo == "CQL":
            self.f_params["actor_learning_rate"] = 3e-5
            self.engine = d3rlpy.algos.CQL(**self.f_params)

        elif self.algo == "MOPO":
            self.engine = d3rlpy.algos.MOPO(**self.f_params)
            
        elif self.algo == "COMBO":
            self.engine = d3rlpy.algos.COMBO(**self.f_params)

    def train(self, n=100, n_steps=1000000, save_interval=100, save_metrics=False, verbose=False):
        dataset, env = get_d4rl(self.task)
        online_env = gym.make(self.task)
        for i in range(n):
            d3rlpy.seed(i)
            env.seed(i)
            online_env.seed(i)

            self.set_engine()

            self.engine.fit(dataset, n_steps=n_steps, save_interval=save_interval, save_metrics=save_metrics, verbose=verbose)
            self.engine.save_model("./saved_models/iql_{}_{}_{}.pt".format(algo, task, i))
            scorer = evaluate_on_environment(online_env, n_trials=100)
            self.mean_results.append(scorer(self.engine))
        return self.mean_results

In [8]:
task = "halfcheetah-medium-v2"
algo = "IQL"
model = MODEL(task, algo)
mean_results = model.train(n=1, n_steps=1)

load datafile: 100%|██████████| 21/21 [00:13<00:00,  1.53it/s]


2023-06-18 17:28:11 [debug    ] RandomIterator is selected.
2023-06-18 17:28:11 [debug    ] Building models...
2023-06-18 17:28:15 [debug    ] Models have been built.


Epoch 1/1: 100%|██████████| 1/1 [00:02<00:00,  2.93s/it, critic_loss=42.9, value_loss=0.129, actor_loss=46.4]


[-1.714704872094834]

In [13]:
import d3rlpy
from d3rlpy.datasets import get_d4rl
from d3rlpy.dynamics.base import DynamicsBase
import gym

task = "halfcheetah-medium-v2"
dataset, env = get_d4rl(task)
online_env = gym.make(task)

dynamics = DynamicsBase(batch_size=256, reward_scaler=None, n_frames=1, scaler=None, action_scaler=None, kwargs=None)
algo = d3rlpy.algos.MOPO(dynamics=dynamics)
algo.build_with_dataset(dataset)
algo.fit(dataset, n_epochs=2, n_steps_per_epoch=2)

load datafile: 100%|██████████| 21/21 [01:57<00:00,  5.60s/it]


2023-06-16 21:41:47 [debug    ] RoundIterator is selected.
2023-06-16 21:41:47 [info     ] Directory is created at d3rlpy_logs/MOPO_20230616214147
2023-06-16 21:41:48 [info     ] Parameters are saved to d3rlpy_logs/MOPO_20230616214147/params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 100, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 1250000, 'initial_temperature': 1.0, 'lam': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory

Epoch 1/2:   0%|          | 0/9990 [00:03<?, ?it/s]


AssertionError: The neural network parameters are not initialized. Pleaes call build_with_dataset, build_with_env, or directly call fit or fit_online method.